f7386e856888630fa0d28ef007c5e52251a5eaeb
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_sparc64_hpc_ace_double / nb_kernel_ElecRF_VdwCSTab_GeomW3W3_sparc64_hpc_ace_double.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
37  */
38 #ifdef HAVE_CONFIG_H
39 #include <config.h>
40 #endif
41
42 #include <math.h>
43
44 #include "../nb_kernel.h"
45 #include "types/simple.h"
46 #include "vec.h"
47 #include "nrnb.h"
48
49 #include "kernelutil_sparc64_hpc_ace_double.h"
50
51 /*
52  * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
53  * Electrostatics interaction: ReactionField
54  * VdW interaction:            CubicSplineTable
55  * Geometry:                   Water3-Water3
56  * Calculate force/pot:        PotentialAndForce
57  */
58 void
59 nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
60                     (t_nblist                    * gmx_restrict       nlist,
61                      rvec                        * gmx_restrict          xx,
62                      rvec                        * gmx_restrict          ff,
63                      t_forcerec                  * gmx_restrict          fr,
64                      t_mdatoms                   * gmx_restrict     mdatoms,
65                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
66                      t_nrnb                      * gmx_restrict        nrnb)
67 {
68     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
69      * just 0 for non-waters.
70      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
71      * jnr indices corresponding to data put in the four positions in the SIMD register.
72      */
73     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
74     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
75     int              jnrA,jnrB;
76     int              j_coord_offsetA,j_coord_offsetB;
77     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
78     real             rcutoff_scalar;
79     real             *shiftvec,*fshift,*x,*f;
80     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
81     int              vdwioffset0;
82     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
83     int              vdwioffset1;
84     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
85     int              vdwioffset2;
86     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
87     int              vdwjidx0A,vdwjidx0B;
88     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
89     int              vdwjidx1A,vdwjidx1B;
90     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
91     int              vdwjidx2A,vdwjidx2B;
92     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
93     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
94     _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
95     _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
96     _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
97     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
98     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
99     _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
100     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
101     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
102     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
103     real             *charge;
104     int              nvdwtype;
105     _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
106     int              *vdwtype;
107     real             *vdwparam;
108     _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
109     _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
110     _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
111     real             *vftab;
112     _fjsp_v2r8       itab_tmp;
113     _fjsp_v2r8       dummy_mask,cutoff_mask;
114     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
115     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
116     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
117
118     x                = xx[0];
119     f                = ff[0];
120
121     nri              = nlist->nri;
122     iinr             = nlist->iinr;
123     jindex           = nlist->jindex;
124     jjnr             = nlist->jjnr;
125     shiftidx         = nlist->shift;
126     gid              = nlist->gid;
127     shiftvec         = fr->shift_vec[0];
128     fshift           = fr->fshift[0];
129     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
130     charge           = mdatoms->chargeA;
131     krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
132     krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
133     crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
134     nvdwtype         = fr->ntype;
135     vdwparam         = fr->nbfp;
136     vdwtype          = mdatoms->typeA;
137
138     vftab            = kernel_data->table_vdw->data;
139     vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
140
141     /* Setup water-specific parameters */
142     inr              = nlist->iinr[0];
143     iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
144     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
145     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
146     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
147
148     jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
149     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
150     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
151     vdwjidx0A        = 2*vdwtype[inr+0];
152     qq00             = _fjsp_mul_v2r8(iq0,jq0);
153     c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
154     c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
155     qq01             = _fjsp_mul_v2r8(iq0,jq1);
156     qq02             = _fjsp_mul_v2r8(iq0,jq2);
157     qq10             = _fjsp_mul_v2r8(iq1,jq0);
158     qq11             = _fjsp_mul_v2r8(iq1,jq1);
159     qq12             = _fjsp_mul_v2r8(iq1,jq2);
160     qq20             = _fjsp_mul_v2r8(iq2,jq0);
161     qq21             = _fjsp_mul_v2r8(iq2,jq1);
162     qq22             = _fjsp_mul_v2r8(iq2,jq2);
163
164     /* Avoid stupid compiler warnings */
165     jnrA = jnrB = 0;
166     j_coord_offsetA = 0;
167     j_coord_offsetB = 0;
168
169     outeriter        = 0;
170     inneriter        = 0;
171
172     /* Start outer loop over neighborlists */
173     for(iidx=0; iidx<nri; iidx++)
174     {
175         /* Load shift vector for this list */
176         i_shift_offset   = DIM*shiftidx[iidx];
177
178         /* Load limits for loop over neighbors */
179         j_index_start    = jindex[iidx];
180         j_index_end      = jindex[iidx+1];
181
182         /* Get outer coordinate index */
183         inr              = iinr[iidx];
184         i_coord_offset   = DIM*inr;
185
186         /* Load i particle coords and add shift vector */
187         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
188                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
189
190         fix0             = _fjsp_setzero_v2r8();
191         fiy0             = _fjsp_setzero_v2r8();
192         fiz0             = _fjsp_setzero_v2r8();
193         fix1             = _fjsp_setzero_v2r8();
194         fiy1             = _fjsp_setzero_v2r8();
195         fiz1             = _fjsp_setzero_v2r8();
196         fix2             = _fjsp_setzero_v2r8();
197         fiy2             = _fjsp_setzero_v2r8();
198         fiz2             = _fjsp_setzero_v2r8();
199
200         /* Reset potential sums */
201         velecsum         = _fjsp_setzero_v2r8();
202         vvdwsum          = _fjsp_setzero_v2r8();
203
204         /* Start inner kernel loop */
205         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
206         {
207
208             /* Get j neighbor index, and coordinate index */
209             jnrA             = jjnr[jidx];
210             jnrB             = jjnr[jidx+1];
211             j_coord_offsetA  = DIM*jnrA;
212             j_coord_offsetB  = DIM*jnrB;
213
214             /* load j atom coordinates */
215             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
216                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
217
218             /* Calculate displacement vector */
219             dx00             = _fjsp_sub_v2r8(ix0,jx0);
220             dy00             = _fjsp_sub_v2r8(iy0,jy0);
221             dz00             = _fjsp_sub_v2r8(iz0,jz0);
222             dx01             = _fjsp_sub_v2r8(ix0,jx1);
223             dy01             = _fjsp_sub_v2r8(iy0,jy1);
224             dz01             = _fjsp_sub_v2r8(iz0,jz1);
225             dx02             = _fjsp_sub_v2r8(ix0,jx2);
226             dy02             = _fjsp_sub_v2r8(iy0,jy2);
227             dz02             = _fjsp_sub_v2r8(iz0,jz2);
228             dx10             = _fjsp_sub_v2r8(ix1,jx0);
229             dy10             = _fjsp_sub_v2r8(iy1,jy0);
230             dz10             = _fjsp_sub_v2r8(iz1,jz0);
231             dx11             = _fjsp_sub_v2r8(ix1,jx1);
232             dy11             = _fjsp_sub_v2r8(iy1,jy1);
233             dz11             = _fjsp_sub_v2r8(iz1,jz1);
234             dx12             = _fjsp_sub_v2r8(ix1,jx2);
235             dy12             = _fjsp_sub_v2r8(iy1,jy2);
236             dz12             = _fjsp_sub_v2r8(iz1,jz2);
237             dx20             = _fjsp_sub_v2r8(ix2,jx0);
238             dy20             = _fjsp_sub_v2r8(iy2,jy0);
239             dz20             = _fjsp_sub_v2r8(iz2,jz0);
240             dx21             = _fjsp_sub_v2r8(ix2,jx1);
241             dy21             = _fjsp_sub_v2r8(iy2,jy1);
242             dz21             = _fjsp_sub_v2r8(iz2,jz1);
243             dx22             = _fjsp_sub_v2r8(ix2,jx2);
244             dy22             = _fjsp_sub_v2r8(iy2,jy2);
245             dz22             = _fjsp_sub_v2r8(iz2,jz2);
246
247             /* Calculate squared distance and things based on it */
248             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
249             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
250             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
251             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
252             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
253             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
254             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
255             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
256             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
257
258             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
259             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
260             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
261             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
262             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
263             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
264             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
265             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
266             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
267
268             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
269             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
270             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
271             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
272             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
273             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
274             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
275             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
276             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
277
278             fjx0             = _fjsp_setzero_v2r8();
279             fjy0             = _fjsp_setzero_v2r8();
280             fjz0             = _fjsp_setzero_v2r8();
281             fjx1             = _fjsp_setzero_v2r8();
282             fjy1             = _fjsp_setzero_v2r8();
283             fjz1             = _fjsp_setzero_v2r8();
284             fjx2             = _fjsp_setzero_v2r8();
285             fjy2             = _fjsp_setzero_v2r8();
286             fjz2             = _fjsp_setzero_v2r8();
287
288             /**************************
289              * CALCULATE INTERACTIONS *
290              **************************/
291
292             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
293
294             /* Calculate table index by multiplying r with table scale and truncate to integer */
295             rt               = _fjsp_mul_v2r8(r00,vftabscale);
296             itab_tmp         = _fjsp_dtox_v2r8(rt);
297             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
298             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
299             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
300
301             vfconv.i[0]     *= 8;
302             vfconv.i[1]     *= 8;
303
304             /* REACTION-FIELD ELECTROSTATICS */
305             velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
306             felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
307
308             /* CUBIC SPLINE TABLE DISPERSION */
309             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
310             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
311             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
312             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
313             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
314             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
315             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
316             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
317             vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
318             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
319             fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
320
321             /* CUBIC SPLINE TABLE REPULSION */
322             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
323             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
324             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
325             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
326             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
327             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
328             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
329             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
330             vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
331             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
332             fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
333             vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
334             fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
335
336             /* Update potential sum for this i atom from the interaction with this j atom. */
337             velecsum         = _fjsp_add_v2r8(velecsum,velec);
338             vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
339
340             fscal            = _fjsp_add_v2r8(felec,fvdw);
341
342             /* Update vectorial force */
343             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
344             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
345             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
346             
347             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
348             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
349             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
350
351             /**************************
352              * CALCULATE INTERACTIONS *
353              **************************/
354
355             /* REACTION-FIELD ELECTROSTATICS */
356             velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
357             felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
358
359             /* Update potential sum for this i atom from the interaction with this j atom. */
360             velecsum         = _fjsp_add_v2r8(velecsum,velec);
361
362             fscal            = felec;
363
364             /* Update vectorial force */
365             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
366             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
367             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
368             
369             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
370             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
371             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
372
373             /**************************
374              * CALCULATE INTERACTIONS *
375              **************************/
376
377             /* REACTION-FIELD ELECTROSTATICS */
378             velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
379             felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
380
381             /* Update potential sum for this i atom from the interaction with this j atom. */
382             velecsum         = _fjsp_add_v2r8(velecsum,velec);
383
384             fscal            = felec;
385
386             /* Update vectorial force */
387             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
388             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
389             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
390             
391             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
392             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
393             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
394
395             /**************************
396              * CALCULATE INTERACTIONS *
397              **************************/
398
399             /* REACTION-FIELD ELECTROSTATICS */
400             velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
401             felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
402
403             /* Update potential sum for this i atom from the interaction with this j atom. */
404             velecsum         = _fjsp_add_v2r8(velecsum,velec);
405
406             fscal            = felec;
407
408             /* Update vectorial force */
409             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
410             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
411             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
412             
413             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
414             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
415             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
416
417             /**************************
418              * CALCULATE INTERACTIONS *
419              **************************/
420
421             /* REACTION-FIELD ELECTROSTATICS */
422             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
423             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
424
425             /* Update potential sum for this i atom from the interaction with this j atom. */
426             velecsum         = _fjsp_add_v2r8(velecsum,velec);
427
428             fscal            = felec;
429
430             /* Update vectorial force */
431             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
432             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
433             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
434             
435             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
436             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
437             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
438
439             /**************************
440              * CALCULATE INTERACTIONS *
441              **************************/
442
443             /* REACTION-FIELD ELECTROSTATICS */
444             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
445             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
446
447             /* Update potential sum for this i atom from the interaction with this j atom. */
448             velecsum         = _fjsp_add_v2r8(velecsum,velec);
449
450             fscal            = felec;
451
452             /* Update vectorial force */
453             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
454             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
455             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
456             
457             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
458             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
459             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
460
461             /**************************
462              * CALCULATE INTERACTIONS *
463              **************************/
464
465             /* REACTION-FIELD ELECTROSTATICS */
466             velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
467             felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
468
469             /* Update potential sum for this i atom from the interaction with this j atom. */
470             velecsum         = _fjsp_add_v2r8(velecsum,velec);
471
472             fscal            = felec;
473
474             /* Update vectorial force */
475             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
476             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
477             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
478             
479             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
480             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
481             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
482
483             /**************************
484              * CALCULATE INTERACTIONS *
485              **************************/
486
487             /* REACTION-FIELD ELECTROSTATICS */
488             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
489             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
490
491             /* Update potential sum for this i atom from the interaction with this j atom. */
492             velecsum         = _fjsp_add_v2r8(velecsum,velec);
493
494             fscal            = felec;
495
496             /* Update vectorial force */
497             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
498             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
499             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
500             
501             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
502             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
503             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
504
505             /**************************
506              * CALCULATE INTERACTIONS *
507              **************************/
508
509             /* REACTION-FIELD ELECTROSTATICS */
510             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
511             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
512
513             /* Update potential sum for this i atom from the interaction with this j atom. */
514             velecsum         = _fjsp_add_v2r8(velecsum,velec);
515
516             fscal            = felec;
517
518             /* Update vectorial force */
519             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
520             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
521             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
522             
523             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
524             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
525             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
526
527             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
528
529             /* Inner loop uses 350 flops */
530         }
531
532         if(jidx<j_index_end)
533         {
534
535             jnrA             = jjnr[jidx];
536             j_coord_offsetA  = DIM*jnrA;
537
538             /* load j atom coordinates */
539             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
540                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
541
542             /* Calculate displacement vector */
543             dx00             = _fjsp_sub_v2r8(ix0,jx0);
544             dy00             = _fjsp_sub_v2r8(iy0,jy0);
545             dz00             = _fjsp_sub_v2r8(iz0,jz0);
546             dx01             = _fjsp_sub_v2r8(ix0,jx1);
547             dy01             = _fjsp_sub_v2r8(iy0,jy1);
548             dz01             = _fjsp_sub_v2r8(iz0,jz1);
549             dx02             = _fjsp_sub_v2r8(ix0,jx2);
550             dy02             = _fjsp_sub_v2r8(iy0,jy2);
551             dz02             = _fjsp_sub_v2r8(iz0,jz2);
552             dx10             = _fjsp_sub_v2r8(ix1,jx0);
553             dy10             = _fjsp_sub_v2r8(iy1,jy0);
554             dz10             = _fjsp_sub_v2r8(iz1,jz0);
555             dx11             = _fjsp_sub_v2r8(ix1,jx1);
556             dy11             = _fjsp_sub_v2r8(iy1,jy1);
557             dz11             = _fjsp_sub_v2r8(iz1,jz1);
558             dx12             = _fjsp_sub_v2r8(ix1,jx2);
559             dy12             = _fjsp_sub_v2r8(iy1,jy2);
560             dz12             = _fjsp_sub_v2r8(iz1,jz2);
561             dx20             = _fjsp_sub_v2r8(ix2,jx0);
562             dy20             = _fjsp_sub_v2r8(iy2,jy0);
563             dz20             = _fjsp_sub_v2r8(iz2,jz0);
564             dx21             = _fjsp_sub_v2r8(ix2,jx1);
565             dy21             = _fjsp_sub_v2r8(iy2,jy1);
566             dz21             = _fjsp_sub_v2r8(iz2,jz1);
567             dx22             = _fjsp_sub_v2r8(ix2,jx2);
568             dy22             = _fjsp_sub_v2r8(iy2,jy2);
569             dz22             = _fjsp_sub_v2r8(iz2,jz2);
570
571             /* Calculate squared distance and things based on it */
572             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
573             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
574             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
575             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
576             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
577             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
578             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
579             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
580             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
581
582             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
583             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
584             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
585             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
586             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
587             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
588             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
589             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
590             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
591
592             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
593             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
594             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
595             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
596             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
597             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
598             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
599             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
600             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
601
602             fjx0             = _fjsp_setzero_v2r8();
603             fjy0             = _fjsp_setzero_v2r8();
604             fjz0             = _fjsp_setzero_v2r8();
605             fjx1             = _fjsp_setzero_v2r8();
606             fjy1             = _fjsp_setzero_v2r8();
607             fjz1             = _fjsp_setzero_v2r8();
608             fjx2             = _fjsp_setzero_v2r8();
609             fjy2             = _fjsp_setzero_v2r8();
610             fjz2             = _fjsp_setzero_v2r8();
611
612             /**************************
613              * CALCULATE INTERACTIONS *
614              **************************/
615
616             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
617
618             /* Calculate table index by multiplying r with table scale and truncate to integer */
619             rt               = _fjsp_mul_v2r8(r00,vftabscale);
620             itab_tmp         = _fjsp_dtox_v2r8(rt);
621             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
622             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
623             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
624
625             vfconv.i[0]     *= 8;
626             vfconv.i[1]     *= 8;
627
628             /* REACTION-FIELD ELECTROSTATICS */
629             velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
630             felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
631
632             /* CUBIC SPLINE TABLE DISPERSION */
633             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
634             F                = _fjsp_setzero_v2r8();
635             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
636             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
637             H                = _fjsp_setzero_v2r8();
638             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
639             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
640             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
641             vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
642             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
643             fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
644
645             /* CUBIC SPLINE TABLE REPULSION */
646             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
647             F                = _fjsp_setzero_v2r8();
648             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
649             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
650             H                = _fjsp_setzero_v2r8();
651             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
652             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
653             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
654             vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
655             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
656             fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
657             vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
658             fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
659
660             /* Update potential sum for this i atom from the interaction with this j atom. */
661             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
662             velecsum         = _fjsp_add_v2r8(velecsum,velec);
663             vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
664             vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
665
666             fscal            = _fjsp_add_v2r8(felec,fvdw);
667
668             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
669
670             /* Update vectorial force */
671             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
672             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
673             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
674             
675             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
676             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
677             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
678
679             /**************************
680              * CALCULATE INTERACTIONS *
681              **************************/
682
683             /* REACTION-FIELD ELECTROSTATICS */
684             velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
685             felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
686
687             /* Update potential sum for this i atom from the interaction with this j atom. */
688             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
689             velecsum         = _fjsp_add_v2r8(velecsum,velec);
690
691             fscal            = felec;
692
693             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
694
695             /* Update vectorial force */
696             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
697             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
698             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
699             
700             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
701             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
702             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
703
704             /**************************
705              * CALCULATE INTERACTIONS *
706              **************************/
707
708             /* REACTION-FIELD ELECTROSTATICS */
709             velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
710             felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
711
712             /* Update potential sum for this i atom from the interaction with this j atom. */
713             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
714             velecsum         = _fjsp_add_v2r8(velecsum,velec);
715
716             fscal            = felec;
717
718             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
719
720             /* Update vectorial force */
721             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
722             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
723             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
724             
725             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
726             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
727             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
728
729             /**************************
730              * CALCULATE INTERACTIONS *
731              **************************/
732
733             /* REACTION-FIELD ELECTROSTATICS */
734             velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
735             felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
736
737             /* Update potential sum for this i atom from the interaction with this j atom. */
738             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
739             velecsum         = _fjsp_add_v2r8(velecsum,velec);
740
741             fscal            = felec;
742
743             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
744
745             /* Update vectorial force */
746             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
747             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
748             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
749             
750             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
751             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
752             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
753
754             /**************************
755              * CALCULATE INTERACTIONS *
756              **************************/
757
758             /* REACTION-FIELD ELECTROSTATICS */
759             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
760             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
761
762             /* Update potential sum for this i atom from the interaction with this j atom. */
763             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
764             velecsum         = _fjsp_add_v2r8(velecsum,velec);
765
766             fscal            = felec;
767
768             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
769
770             /* Update vectorial force */
771             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
772             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
773             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
774             
775             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
776             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
777             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
778
779             /**************************
780              * CALCULATE INTERACTIONS *
781              **************************/
782
783             /* REACTION-FIELD ELECTROSTATICS */
784             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
785             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
786
787             /* Update potential sum for this i atom from the interaction with this j atom. */
788             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
789             velecsum         = _fjsp_add_v2r8(velecsum,velec);
790
791             fscal            = felec;
792
793             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
794
795             /* Update vectorial force */
796             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
797             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
798             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
799             
800             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
801             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
802             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
803
804             /**************************
805              * CALCULATE INTERACTIONS *
806              **************************/
807
808             /* REACTION-FIELD ELECTROSTATICS */
809             velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
810             felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
811
812             /* Update potential sum for this i atom from the interaction with this j atom. */
813             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
814             velecsum         = _fjsp_add_v2r8(velecsum,velec);
815
816             fscal            = felec;
817
818             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
819
820             /* Update vectorial force */
821             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
822             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
823             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
824             
825             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
826             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
827             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
828
829             /**************************
830              * CALCULATE INTERACTIONS *
831              **************************/
832
833             /* REACTION-FIELD ELECTROSTATICS */
834             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
835             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
836
837             /* Update potential sum for this i atom from the interaction with this j atom. */
838             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
839             velecsum         = _fjsp_add_v2r8(velecsum,velec);
840
841             fscal            = felec;
842
843             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
844
845             /* Update vectorial force */
846             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
847             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
848             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
849             
850             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
851             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
852             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
853
854             /**************************
855              * CALCULATE INTERACTIONS *
856              **************************/
857
858             /* REACTION-FIELD ELECTROSTATICS */
859             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
860             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
861
862             /* Update potential sum for this i atom from the interaction with this j atom. */
863             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
864             velecsum         = _fjsp_add_v2r8(velecsum,velec);
865
866             fscal            = felec;
867
868             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
869
870             /* Update vectorial force */
871             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
872             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
873             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
874             
875             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
876             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
877             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
878
879             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
880
881             /* Inner loop uses 350 flops */
882         }
883
884         /* End of innermost loop */
885
886         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
887                                               f+i_coord_offset,fshift+i_shift_offset);
888
889         ggid                        = gid[iidx];
890         /* Update potential energies */
891         gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
892         gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
893
894         /* Increment number of inner iterations */
895         inneriter                  += j_index_end - j_index_start;
896
897         /* Outer loop uses 20 flops */
898     }
899
900     /* Increment number of outer iterations */
901     outeriter        += nri;
902
903     /* Update outer/inner flops */
904
905     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*350);
906 }
907 /*
908  * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
909  * Electrostatics interaction: ReactionField
910  * VdW interaction:            CubicSplineTable
911  * Geometry:                   Water3-Water3
912  * Calculate force/pot:        Force
913  */
914 void
915 nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
916                     (t_nblist                    * gmx_restrict       nlist,
917                      rvec                        * gmx_restrict          xx,
918                      rvec                        * gmx_restrict          ff,
919                      t_forcerec                  * gmx_restrict          fr,
920                      t_mdatoms                   * gmx_restrict     mdatoms,
921                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
922                      t_nrnb                      * gmx_restrict        nrnb)
923 {
924     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
925      * just 0 for non-waters.
926      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
927      * jnr indices corresponding to data put in the four positions in the SIMD register.
928      */
929     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
930     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
931     int              jnrA,jnrB;
932     int              j_coord_offsetA,j_coord_offsetB;
933     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
934     real             rcutoff_scalar;
935     real             *shiftvec,*fshift,*x,*f;
936     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
937     int              vdwioffset0;
938     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
939     int              vdwioffset1;
940     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
941     int              vdwioffset2;
942     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
943     int              vdwjidx0A,vdwjidx0B;
944     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
945     int              vdwjidx1A,vdwjidx1B;
946     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
947     int              vdwjidx2A,vdwjidx2B;
948     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
949     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
950     _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
951     _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
952     _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
953     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
954     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
955     _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
956     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
957     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
958     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
959     real             *charge;
960     int              nvdwtype;
961     _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
962     int              *vdwtype;
963     real             *vdwparam;
964     _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
965     _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
966     _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
967     real             *vftab;
968     _fjsp_v2r8       itab_tmp;
969     _fjsp_v2r8       dummy_mask,cutoff_mask;
970     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
971     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
972     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
973
974     x                = xx[0];
975     f                = ff[0];
976
977     nri              = nlist->nri;
978     iinr             = nlist->iinr;
979     jindex           = nlist->jindex;
980     jjnr             = nlist->jjnr;
981     shiftidx         = nlist->shift;
982     gid              = nlist->gid;
983     shiftvec         = fr->shift_vec[0];
984     fshift           = fr->fshift[0];
985     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
986     charge           = mdatoms->chargeA;
987     krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
988     krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
989     crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
990     nvdwtype         = fr->ntype;
991     vdwparam         = fr->nbfp;
992     vdwtype          = mdatoms->typeA;
993
994     vftab            = kernel_data->table_vdw->data;
995     vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
996
997     /* Setup water-specific parameters */
998     inr              = nlist->iinr[0];
999     iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
1000     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1001     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1002     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
1003
1004     jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
1005     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
1006     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
1007     vdwjidx0A        = 2*vdwtype[inr+0];
1008     qq00             = _fjsp_mul_v2r8(iq0,jq0);
1009     c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
1010     c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
1011     qq01             = _fjsp_mul_v2r8(iq0,jq1);
1012     qq02             = _fjsp_mul_v2r8(iq0,jq2);
1013     qq10             = _fjsp_mul_v2r8(iq1,jq0);
1014     qq11             = _fjsp_mul_v2r8(iq1,jq1);
1015     qq12             = _fjsp_mul_v2r8(iq1,jq2);
1016     qq20             = _fjsp_mul_v2r8(iq2,jq0);
1017     qq21             = _fjsp_mul_v2r8(iq2,jq1);
1018     qq22             = _fjsp_mul_v2r8(iq2,jq2);
1019
1020     /* Avoid stupid compiler warnings */
1021     jnrA = jnrB = 0;
1022     j_coord_offsetA = 0;
1023     j_coord_offsetB = 0;
1024
1025     outeriter        = 0;
1026     inneriter        = 0;
1027
1028     /* Start outer loop over neighborlists */
1029     for(iidx=0; iidx<nri; iidx++)
1030     {
1031         /* Load shift vector for this list */
1032         i_shift_offset   = DIM*shiftidx[iidx];
1033
1034         /* Load limits for loop over neighbors */
1035         j_index_start    = jindex[iidx];
1036         j_index_end      = jindex[iidx+1];
1037
1038         /* Get outer coordinate index */
1039         inr              = iinr[iidx];
1040         i_coord_offset   = DIM*inr;
1041
1042         /* Load i particle coords and add shift vector */
1043         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
1044                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
1045
1046         fix0             = _fjsp_setzero_v2r8();
1047         fiy0             = _fjsp_setzero_v2r8();
1048         fiz0             = _fjsp_setzero_v2r8();
1049         fix1             = _fjsp_setzero_v2r8();
1050         fiy1             = _fjsp_setzero_v2r8();
1051         fiz1             = _fjsp_setzero_v2r8();
1052         fix2             = _fjsp_setzero_v2r8();
1053         fiy2             = _fjsp_setzero_v2r8();
1054         fiz2             = _fjsp_setzero_v2r8();
1055
1056         /* Start inner kernel loop */
1057         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1058         {
1059
1060             /* Get j neighbor index, and coordinate index */
1061             jnrA             = jjnr[jidx];
1062             jnrB             = jjnr[jidx+1];
1063             j_coord_offsetA  = DIM*jnrA;
1064             j_coord_offsetB  = DIM*jnrB;
1065
1066             /* load j atom coordinates */
1067             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
1068                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1069
1070             /* Calculate displacement vector */
1071             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1072             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1073             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1074             dx01             = _fjsp_sub_v2r8(ix0,jx1);
1075             dy01             = _fjsp_sub_v2r8(iy0,jy1);
1076             dz01             = _fjsp_sub_v2r8(iz0,jz1);
1077             dx02             = _fjsp_sub_v2r8(ix0,jx2);
1078             dy02             = _fjsp_sub_v2r8(iy0,jy2);
1079             dz02             = _fjsp_sub_v2r8(iz0,jz2);
1080             dx10             = _fjsp_sub_v2r8(ix1,jx0);
1081             dy10             = _fjsp_sub_v2r8(iy1,jy0);
1082             dz10             = _fjsp_sub_v2r8(iz1,jz0);
1083             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1084             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1085             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1086             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1087             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1088             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1089             dx20             = _fjsp_sub_v2r8(ix2,jx0);
1090             dy20             = _fjsp_sub_v2r8(iy2,jy0);
1091             dz20             = _fjsp_sub_v2r8(iz2,jz0);
1092             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1093             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1094             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1095             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1096             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1097             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1098
1099             /* Calculate squared distance and things based on it */
1100             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1101             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1102             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1103             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1104             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1105             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1106             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1107             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1108             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1109
1110             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
1111             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
1112             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
1113             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
1114             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1115             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1116             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
1117             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1118             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1119
1120             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
1121             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
1122             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
1123             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
1124             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1125             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1126             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
1127             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1128             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1129
1130             fjx0             = _fjsp_setzero_v2r8();
1131             fjy0             = _fjsp_setzero_v2r8();
1132             fjz0             = _fjsp_setzero_v2r8();
1133             fjx1             = _fjsp_setzero_v2r8();
1134             fjy1             = _fjsp_setzero_v2r8();
1135             fjz1             = _fjsp_setzero_v2r8();
1136             fjx2             = _fjsp_setzero_v2r8();
1137             fjy2             = _fjsp_setzero_v2r8();
1138             fjz2             = _fjsp_setzero_v2r8();
1139
1140             /**************************
1141              * CALCULATE INTERACTIONS *
1142              **************************/
1143
1144             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
1145
1146             /* Calculate table index by multiplying r with table scale and truncate to integer */
1147             rt               = _fjsp_mul_v2r8(r00,vftabscale);
1148             itab_tmp         = _fjsp_dtox_v2r8(rt);
1149             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1150             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1151             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1152
1153             vfconv.i[0]     *= 8;
1154             vfconv.i[1]     *= 8;
1155
1156             /* REACTION-FIELD ELECTROSTATICS */
1157             felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
1158
1159             /* CUBIC SPLINE TABLE DISPERSION */
1160             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1161             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1162             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1163             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
1164             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
1165             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1166             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1167             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1168             fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
1169
1170             /* CUBIC SPLINE TABLE REPULSION */
1171             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
1172             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
1173             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1174             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
1175             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
1176             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1177             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1178             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1179             fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
1180             fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
1181
1182             fscal            = _fjsp_add_v2r8(felec,fvdw);
1183
1184             /* Update vectorial force */
1185             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1186             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1187             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1188             
1189             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1190             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1191             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1192
1193             /**************************
1194              * CALCULATE INTERACTIONS *
1195              **************************/
1196
1197             /* REACTION-FIELD ELECTROSTATICS */
1198             felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
1199
1200             fscal            = felec;
1201
1202             /* Update vectorial force */
1203             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
1204             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1205             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1206             
1207             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1208             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1209             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1210
1211             /**************************
1212              * CALCULATE INTERACTIONS *
1213              **************************/
1214
1215             /* REACTION-FIELD ELECTROSTATICS */
1216             felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
1217
1218             fscal            = felec;
1219
1220             /* Update vectorial force */
1221             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
1222             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1223             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1224             
1225             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1226             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1227             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1228
1229             /**************************
1230              * CALCULATE INTERACTIONS *
1231              **************************/
1232
1233             /* REACTION-FIELD ELECTROSTATICS */
1234             felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
1235
1236             fscal            = felec;
1237
1238             /* Update vectorial force */
1239             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
1240             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1241             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1242             
1243             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1244             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1245             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1246
1247             /**************************
1248              * CALCULATE INTERACTIONS *
1249              **************************/
1250
1251             /* REACTION-FIELD ELECTROSTATICS */
1252             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
1253
1254             fscal            = felec;
1255
1256             /* Update vectorial force */
1257             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1258             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1259             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1260             
1261             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1262             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1263             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1264
1265             /**************************
1266              * CALCULATE INTERACTIONS *
1267              **************************/
1268
1269             /* REACTION-FIELD ELECTROSTATICS */
1270             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
1271
1272             fscal            = felec;
1273
1274             /* Update vectorial force */
1275             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1276             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1277             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1278             
1279             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1280             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1281             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1282
1283             /**************************
1284              * CALCULATE INTERACTIONS *
1285              **************************/
1286
1287             /* REACTION-FIELD ELECTROSTATICS */
1288             felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
1289
1290             fscal            = felec;
1291
1292             /* Update vectorial force */
1293             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
1294             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1295             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1296             
1297             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1298             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1299             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1300
1301             /**************************
1302              * CALCULATE INTERACTIONS *
1303              **************************/
1304
1305             /* REACTION-FIELD ELECTROSTATICS */
1306             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1307
1308             fscal            = felec;
1309
1310             /* Update vectorial force */
1311             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1312             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1313             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1314             
1315             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1316             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1317             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1318
1319             /**************************
1320              * CALCULATE INTERACTIONS *
1321              **************************/
1322
1323             /* REACTION-FIELD ELECTROSTATICS */
1324             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1325
1326             fscal            = felec;
1327
1328             /* Update vectorial force */
1329             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1330             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1331             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1332             
1333             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1334             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1335             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1336
1337             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1338
1339             /* Inner loop uses 297 flops */
1340         }
1341
1342         if(jidx<j_index_end)
1343         {
1344
1345             jnrA             = jjnr[jidx];
1346             j_coord_offsetA  = DIM*jnrA;
1347
1348             /* load j atom coordinates */
1349             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1350                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1351
1352             /* Calculate displacement vector */
1353             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1354             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1355             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1356             dx01             = _fjsp_sub_v2r8(ix0,jx1);
1357             dy01             = _fjsp_sub_v2r8(iy0,jy1);
1358             dz01             = _fjsp_sub_v2r8(iz0,jz1);
1359             dx02             = _fjsp_sub_v2r8(ix0,jx2);
1360             dy02             = _fjsp_sub_v2r8(iy0,jy2);
1361             dz02             = _fjsp_sub_v2r8(iz0,jz2);
1362             dx10             = _fjsp_sub_v2r8(ix1,jx0);
1363             dy10             = _fjsp_sub_v2r8(iy1,jy0);
1364             dz10             = _fjsp_sub_v2r8(iz1,jz0);
1365             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1366             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1367             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1368             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1369             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1370             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1371             dx20             = _fjsp_sub_v2r8(ix2,jx0);
1372             dy20             = _fjsp_sub_v2r8(iy2,jy0);
1373             dz20             = _fjsp_sub_v2r8(iz2,jz0);
1374             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1375             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1376             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1377             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1378             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1379             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1380
1381             /* Calculate squared distance and things based on it */
1382             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1383             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1384             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1385             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1386             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1387             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1388             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1389             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1390             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1391
1392             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
1393             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
1394             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
1395             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
1396             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1397             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1398             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
1399             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1400             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1401
1402             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
1403             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
1404             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
1405             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
1406             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1407             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1408             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
1409             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1410             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1411
1412             fjx0             = _fjsp_setzero_v2r8();
1413             fjy0             = _fjsp_setzero_v2r8();
1414             fjz0             = _fjsp_setzero_v2r8();
1415             fjx1             = _fjsp_setzero_v2r8();
1416             fjy1             = _fjsp_setzero_v2r8();
1417             fjz1             = _fjsp_setzero_v2r8();
1418             fjx2             = _fjsp_setzero_v2r8();
1419             fjy2             = _fjsp_setzero_v2r8();
1420             fjz2             = _fjsp_setzero_v2r8();
1421
1422             /**************************
1423              * CALCULATE INTERACTIONS *
1424              **************************/
1425
1426             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
1427
1428             /* Calculate table index by multiplying r with table scale and truncate to integer */
1429             rt               = _fjsp_mul_v2r8(r00,vftabscale);
1430             itab_tmp         = _fjsp_dtox_v2r8(rt);
1431             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1432             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1433             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1434
1435             vfconv.i[0]     *= 8;
1436             vfconv.i[1]     *= 8;
1437
1438             /* REACTION-FIELD ELECTROSTATICS */
1439             felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
1440
1441             /* CUBIC SPLINE TABLE DISPERSION */
1442             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1443             F                = _fjsp_setzero_v2r8();
1444             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1445             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
1446             H                = _fjsp_setzero_v2r8();
1447             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1448             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1449             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1450             fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
1451
1452             /* CUBIC SPLINE TABLE REPULSION */
1453             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
1454             F                = _fjsp_setzero_v2r8();
1455             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1456             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
1457             H                = _fjsp_setzero_v2r8();
1458             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1459             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1460             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1461             fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
1462             fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
1463
1464             fscal            = _fjsp_add_v2r8(felec,fvdw);
1465
1466             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1467
1468             /* Update vectorial force */
1469             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1470             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1471             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1472             
1473             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1474             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1475             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1476
1477             /**************************
1478              * CALCULATE INTERACTIONS *
1479              **************************/
1480
1481             /* REACTION-FIELD ELECTROSTATICS */
1482             felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
1483
1484             fscal            = felec;
1485
1486             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1487
1488             /* Update vectorial force */
1489             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
1490             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1491             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1492             
1493             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1494             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1495             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1496
1497             /**************************
1498              * CALCULATE INTERACTIONS *
1499              **************************/
1500
1501             /* REACTION-FIELD ELECTROSTATICS */
1502             felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
1503
1504             fscal            = felec;
1505
1506             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1507
1508             /* Update vectorial force */
1509             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
1510             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1511             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1512             
1513             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1514             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1515             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1516
1517             /**************************
1518              * CALCULATE INTERACTIONS *
1519              **************************/
1520
1521             /* REACTION-FIELD ELECTROSTATICS */
1522             felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
1523
1524             fscal            = felec;
1525
1526             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1527
1528             /* Update vectorial force */
1529             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
1530             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1531             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1532             
1533             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1534             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1535             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1536
1537             /**************************
1538              * CALCULATE INTERACTIONS *
1539              **************************/
1540
1541             /* REACTION-FIELD ELECTROSTATICS */
1542             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
1543
1544             fscal            = felec;
1545
1546             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1547
1548             /* Update vectorial force */
1549             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1550             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1551             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1552             
1553             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1554             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1555             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1556
1557             /**************************
1558              * CALCULATE INTERACTIONS *
1559              **************************/
1560
1561             /* REACTION-FIELD ELECTROSTATICS */
1562             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
1563
1564             fscal            = felec;
1565
1566             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1567
1568             /* Update vectorial force */
1569             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1570             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1571             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1572             
1573             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1574             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1575             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1576
1577             /**************************
1578              * CALCULATE INTERACTIONS *
1579              **************************/
1580
1581             /* REACTION-FIELD ELECTROSTATICS */
1582             felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
1583
1584             fscal            = felec;
1585
1586             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1587
1588             /* Update vectorial force */
1589             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
1590             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1591             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1592             
1593             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1594             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1595             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1596
1597             /**************************
1598              * CALCULATE INTERACTIONS *
1599              **************************/
1600
1601             /* REACTION-FIELD ELECTROSTATICS */
1602             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1603
1604             fscal            = felec;
1605
1606             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1607
1608             /* Update vectorial force */
1609             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1610             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1611             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1612             
1613             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1614             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1615             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1616
1617             /**************************
1618              * CALCULATE INTERACTIONS *
1619              **************************/
1620
1621             /* REACTION-FIELD ELECTROSTATICS */
1622             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1623
1624             fscal            = felec;
1625
1626             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1627
1628             /* Update vectorial force */
1629             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1630             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1631             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1632             
1633             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1634             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1635             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1636
1637             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1638
1639             /* Inner loop uses 297 flops */
1640         }
1641
1642         /* End of innermost loop */
1643
1644         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1645                                               f+i_coord_offset,fshift+i_shift_offset);
1646
1647         /* Increment number of inner iterations */
1648         inneriter                  += j_index_end - j_index_start;
1649
1650         /* Outer loop uses 18 flops */
1651     }
1652
1653     /* Increment number of outer iterations */
1654     outeriter        += nri;
1655
1656     /* Update outer/inner flops */
1657
1658     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*297);
1659 }