4476e68a68dab3135daf76afea57f4e8633ef334
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_sparc64_hpc_ace_double / nb_kernel_ElecRF_VdwCSTab_GeomW4W4_sparc64_hpc_ace_double.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
37  */
38 #include "config.h"
39
40 #include <math.h>
41
42 #include "../nb_kernel.h"
43 #include "types/simple.h"
44 #include "gromacs/math/vec.h"
45 #include "nrnb.h"
46
47 #include "kernelutil_sparc64_hpc_ace_double.h"
48
49 /*
50  * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double
51  * Electrostatics interaction: ReactionField
52  * VdW interaction:            CubicSplineTable
53  * Geometry:                   Water4-Water4
54  * Calculate force/pot:        PotentialAndForce
55  */
56 void
57 nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double
58                     (t_nblist                    * gmx_restrict       nlist,
59                      rvec                        * gmx_restrict          xx,
60                      rvec                        * gmx_restrict          ff,
61                      t_forcerec                  * gmx_restrict          fr,
62                      t_mdatoms                   * gmx_restrict     mdatoms,
63                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
64                      t_nrnb                      * gmx_restrict        nrnb)
65 {
66     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
67      * just 0 for non-waters.
68      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
69      * jnr indices corresponding to data put in the four positions in the SIMD register.
70      */
71     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
72     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
73     int              jnrA,jnrB;
74     int              j_coord_offsetA,j_coord_offsetB;
75     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
76     real             rcutoff_scalar;
77     real             *shiftvec,*fshift,*x,*f;
78     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
79     int              vdwioffset0;
80     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
81     int              vdwioffset1;
82     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
83     int              vdwioffset2;
84     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
85     int              vdwioffset3;
86     _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
87     int              vdwjidx0A,vdwjidx0B;
88     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
89     int              vdwjidx1A,vdwjidx1B;
90     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
91     int              vdwjidx2A,vdwjidx2B;
92     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
93     int              vdwjidx3A,vdwjidx3B;
94     _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
95     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
96     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
97     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
98     _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
99     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
100     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
101     _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
102     _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
103     _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
104     _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
105     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
106     real             *charge;
107     int              nvdwtype;
108     _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
109     int              *vdwtype;
110     real             *vdwparam;
111     _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
112     _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
113     _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
114     real             *vftab;
115     _fjsp_v2r8       itab_tmp;
116     _fjsp_v2r8       dummy_mask,cutoff_mask;
117     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
118     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
119     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
120
121     x                = xx[0];
122     f                = ff[0];
123
124     nri              = nlist->nri;
125     iinr             = nlist->iinr;
126     jindex           = nlist->jindex;
127     jjnr             = nlist->jjnr;
128     shiftidx         = nlist->shift;
129     gid              = nlist->gid;
130     shiftvec         = fr->shift_vec[0];
131     fshift           = fr->fshift[0];
132     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
133     charge           = mdatoms->chargeA;
134     krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
135     krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
136     crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
137     nvdwtype         = fr->ntype;
138     vdwparam         = fr->nbfp;
139     vdwtype          = mdatoms->typeA;
140
141     vftab            = kernel_data->table_vdw->data;
142     vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
143
144     /* Setup water-specific parameters */
145     inr              = nlist->iinr[0];
146     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
147     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
148     iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
149     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
150
151     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
152     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
153     jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
154     vdwjidx0A        = 2*vdwtype[inr+0];
155     c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
156     c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
157     qq11             = _fjsp_mul_v2r8(iq1,jq1);
158     qq12             = _fjsp_mul_v2r8(iq1,jq2);
159     qq13             = _fjsp_mul_v2r8(iq1,jq3);
160     qq21             = _fjsp_mul_v2r8(iq2,jq1);
161     qq22             = _fjsp_mul_v2r8(iq2,jq2);
162     qq23             = _fjsp_mul_v2r8(iq2,jq3);
163     qq31             = _fjsp_mul_v2r8(iq3,jq1);
164     qq32             = _fjsp_mul_v2r8(iq3,jq2);
165     qq33             = _fjsp_mul_v2r8(iq3,jq3);
166
167     /* Avoid stupid compiler warnings */
168     jnrA = jnrB = 0;
169     j_coord_offsetA = 0;
170     j_coord_offsetB = 0;
171
172     outeriter        = 0;
173     inneriter        = 0;
174
175     /* Start outer loop over neighborlists */
176     for(iidx=0; iidx<nri; iidx++)
177     {
178         /* Load shift vector for this list */
179         i_shift_offset   = DIM*shiftidx[iidx];
180
181         /* Load limits for loop over neighbors */
182         j_index_start    = jindex[iidx];
183         j_index_end      = jindex[iidx+1];
184
185         /* Get outer coordinate index */
186         inr              = iinr[iidx];
187         i_coord_offset   = DIM*inr;
188
189         /* Load i particle coords and add shift vector */
190         gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
191                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
192
193         fix0             = _fjsp_setzero_v2r8();
194         fiy0             = _fjsp_setzero_v2r8();
195         fiz0             = _fjsp_setzero_v2r8();
196         fix1             = _fjsp_setzero_v2r8();
197         fiy1             = _fjsp_setzero_v2r8();
198         fiz1             = _fjsp_setzero_v2r8();
199         fix2             = _fjsp_setzero_v2r8();
200         fiy2             = _fjsp_setzero_v2r8();
201         fiz2             = _fjsp_setzero_v2r8();
202         fix3             = _fjsp_setzero_v2r8();
203         fiy3             = _fjsp_setzero_v2r8();
204         fiz3             = _fjsp_setzero_v2r8();
205
206         /* Reset potential sums */
207         velecsum         = _fjsp_setzero_v2r8();
208         vvdwsum          = _fjsp_setzero_v2r8();
209
210         /* Start inner kernel loop */
211         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
212         {
213
214             /* Get j neighbor index, and coordinate index */
215             jnrA             = jjnr[jidx];
216             jnrB             = jjnr[jidx+1];
217             j_coord_offsetA  = DIM*jnrA;
218             j_coord_offsetB  = DIM*jnrB;
219
220             /* load j atom coordinates */
221             gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
222                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
223                                               &jy2,&jz2,&jx3,&jy3,&jz3);
224
225             /* Calculate displacement vector */
226             dx00             = _fjsp_sub_v2r8(ix0,jx0);
227             dy00             = _fjsp_sub_v2r8(iy0,jy0);
228             dz00             = _fjsp_sub_v2r8(iz0,jz0);
229             dx11             = _fjsp_sub_v2r8(ix1,jx1);
230             dy11             = _fjsp_sub_v2r8(iy1,jy1);
231             dz11             = _fjsp_sub_v2r8(iz1,jz1);
232             dx12             = _fjsp_sub_v2r8(ix1,jx2);
233             dy12             = _fjsp_sub_v2r8(iy1,jy2);
234             dz12             = _fjsp_sub_v2r8(iz1,jz2);
235             dx13             = _fjsp_sub_v2r8(ix1,jx3);
236             dy13             = _fjsp_sub_v2r8(iy1,jy3);
237             dz13             = _fjsp_sub_v2r8(iz1,jz3);
238             dx21             = _fjsp_sub_v2r8(ix2,jx1);
239             dy21             = _fjsp_sub_v2r8(iy2,jy1);
240             dz21             = _fjsp_sub_v2r8(iz2,jz1);
241             dx22             = _fjsp_sub_v2r8(ix2,jx2);
242             dy22             = _fjsp_sub_v2r8(iy2,jy2);
243             dz22             = _fjsp_sub_v2r8(iz2,jz2);
244             dx23             = _fjsp_sub_v2r8(ix2,jx3);
245             dy23             = _fjsp_sub_v2r8(iy2,jy3);
246             dz23             = _fjsp_sub_v2r8(iz2,jz3);
247             dx31             = _fjsp_sub_v2r8(ix3,jx1);
248             dy31             = _fjsp_sub_v2r8(iy3,jy1);
249             dz31             = _fjsp_sub_v2r8(iz3,jz1);
250             dx32             = _fjsp_sub_v2r8(ix3,jx2);
251             dy32             = _fjsp_sub_v2r8(iy3,jy2);
252             dz32             = _fjsp_sub_v2r8(iz3,jz2);
253             dx33             = _fjsp_sub_v2r8(ix3,jx3);
254             dy33             = _fjsp_sub_v2r8(iy3,jy3);
255             dz33             = _fjsp_sub_v2r8(iz3,jz3);
256
257             /* Calculate squared distance and things based on it */
258             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
259             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
260             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
261             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
262             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
263             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
264             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
265             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
266             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
267             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
268
269             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
270             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
271             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
272             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
273             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
274             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
275             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
276             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
277             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
278             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
279
280             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
281             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
282             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
283             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
284             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
285             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
286             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
287             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
288             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
289
290             fjx0             = _fjsp_setzero_v2r8();
291             fjy0             = _fjsp_setzero_v2r8();
292             fjz0             = _fjsp_setzero_v2r8();
293             fjx1             = _fjsp_setzero_v2r8();
294             fjy1             = _fjsp_setzero_v2r8();
295             fjz1             = _fjsp_setzero_v2r8();
296             fjx2             = _fjsp_setzero_v2r8();
297             fjy2             = _fjsp_setzero_v2r8();
298             fjz2             = _fjsp_setzero_v2r8();
299             fjx3             = _fjsp_setzero_v2r8();
300             fjy3             = _fjsp_setzero_v2r8();
301             fjz3             = _fjsp_setzero_v2r8();
302
303             /**************************
304              * CALCULATE INTERACTIONS *
305              **************************/
306
307             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
308
309             /* Calculate table index by multiplying r with table scale and truncate to integer */
310             rt               = _fjsp_mul_v2r8(r00,vftabscale);
311             itab_tmp         = _fjsp_dtox_v2r8(rt);
312             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
313             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
314             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
315
316             vfconv.i[0]     *= 8;
317             vfconv.i[1]     *= 8;
318
319             /* CUBIC SPLINE TABLE DISPERSION */
320             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
321             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
322             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
323             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
324             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
325             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
326             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
327             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
328             vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
329             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
330             fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
331
332             /* CUBIC SPLINE TABLE REPULSION */
333             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
334             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
335             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
336             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
337             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
338             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
339             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
340             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
341             vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
342             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
343             fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
344             vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
345             fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
346
347             /* Update potential sum for this i atom from the interaction with this j atom. */
348             vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
349
350             fscal            = fvdw;
351
352             /* Update vectorial force */
353             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
354             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
355             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
356             
357             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
358             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
359             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
360
361             /**************************
362              * CALCULATE INTERACTIONS *
363              **************************/
364
365             /* REACTION-FIELD ELECTROSTATICS */
366             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
367             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
368
369             /* Update potential sum for this i atom from the interaction with this j atom. */
370             velecsum         = _fjsp_add_v2r8(velecsum,velec);
371
372             fscal            = felec;
373
374             /* Update vectorial force */
375             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
376             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
377             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
378             
379             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
380             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
381             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
382
383             /**************************
384              * CALCULATE INTERACTIONS *
385              **************************/
386
387             /* REACTION-FIELD ELECTROSTATICS */
388             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
389             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
390
391             /* Update potential sum for this i atom from the interaction with this j atom. */
392             velecsum         = _fjsp_add_v2r8(velecsum,velec);
393
394             fscal            = felec;
395
396             /* Update vectorial force */
397             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
398             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
399             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
400             
401             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
402             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
403             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
404
405             /**************************
406              * CALCULATE INTERACTIONS *
407              **************************/
408
409             /* REACTION-FIELD ELECTROSTATICS */
410             velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
411             felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
412
413             /* Update potential sum for this i atom from the interaction with this j atom. */
414             velecsum         = _fjsp_add_v2r8(velecsum,velec);
415
416             fscal            = felec;
417
418             /* Update vectorial force */
419             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
420             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
421             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
422             
423             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
424             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
425             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
426
427             /**************************
428              * CALCULATE INTERACTIONS *
429              **************************/
430
431             /* REACTION-FIELD ELECTROSTATICS */
432             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
433             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
434
435             /* Update potential sum for this i atom from the interaction with this j atom. */
436             velecsum         = _fjsp_add_v2r8(velecsum,velec);
437
438             fscal            = felec;
439
440             /* Update vectorial force */
441             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
442             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
443             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
444             
445             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
446             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
447             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
448
449             /**************************
450              * CALCULATE INTERACTIONS *
451              **************************/
452
453             /* REACTION-FIELD ELECTROSTATICS */
454             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
455             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
456
457             /* Update potential sum for this i atom from the interaction with this j atom. */
458             velecsum         = _fjsp_add_v2r8(velecsum,velec);
459
460             fscal            = felec;
461
462             /* Update vectorial force */
463             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
464             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
465             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
466             
467             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
468             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
469             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
470
471             /**************************
472              * CALCULATE INTERACTIONS *
473              **************************/
474
475             /* REACTION-FIELD ELECTROSTATICS */
476             velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
477             felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
478
479             /* Update potential sum for this i atom from the interaction with this j atom. */
480             velecsum         = _fjsp_add_v2r8(velecsum,velec);
481
482             fscal            = felec;
483
484             /* Update vectorial force */
485             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
486             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
487             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
488             
489             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
490             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
491             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
492
493             /**************************
494              * CALCULATE INTERACTIONS *
495              **************************/
496
497             /* REACTION-FIELD ELECTROSTATICS */
498             velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
499             felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
500
501             /* Update potential sum for this i atom from the interaction with this j atom. */
502             velecsum         = _fjsp_add_v2r8(velecsum,velec);
503
504             fscal            = felec;
505
506             /* Update vectorial force */
507             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
508             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
509             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
510             
511             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
512             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
513             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
514
515             /**************************
516              * CALCULATE INTERACTIONS *
517              **************************/
518
519             /* REACTION-FIELD ELECTROSTATICS */
520             velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
521             felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
522
523             /* Update potential sum for this i atom from the interaction with this j atom. */
524             velecsum         = _fjsp_add_v2r8(velecsum,velec);
525
526             fscal            = felec;
527
528             /* Update vectorial force */
529             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
530             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
531             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
532             
533             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
534             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
535             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
536
537             /**************************
538              * CALCULATE INTERACTIONS *
539              **************************/
540
541             /* REACTION-FIELD ELECTROSTATICS */
542             velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
543             felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
544
545             /* Update potential sum for this i atom from the interaction with this j atom. */
546             velecsum         = _fjsp_add_v2r8(velecsum,velec);
547
548             fscal            = felec;
549
550             /* Update vectorial force */
551             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
552             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
553             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
554             
555             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
556             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
557             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
558
559             gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
560
561             /* Inner loop uses 377 flops */
562         }
563
564         if(jidx<j_index_end)
565         {
566
567             jnrA             = jjnr[jidx];
568             j_coord_offsetA  = DIM*jnrA;
569
570             /* load j atom coordinates */
571             gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
572                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
573                                               &jy2,&jz2,&jx3,&jy3,&jz3);
574
575             /* Calculate displacement vector */
576             dx00             = _fjsp_sub_v2r8(ix0,jx0);
577             dy00             = _fjsp_sub_v2r8(iy0,jy0);
578             dz00             = _fjsp_sub_v2r8(iz0,jz0);
579             dx11             = _fjsp_sub_v2r8(ix1,jx1);
580             dy11             = _fjsp_sub_v2r8(iy1,jy1);
581             dz11             = _fjsp_sub_v2r8(iz1,jz1);
582             dx12             = _fjsp_sub_v2r8(ix1,jx2);
583             dy12             = _fjsp_sub_v2r8(iy1,jy2);
584             dz12             = _fjsp_sub_v2r8(iz1,jz2);
585             dx13             = _fjsp_sub_v2r8(ix1,jx3);
586             dy13             = _fjsp_sub_v2r8(iy1,jy3);
587             dz13             = _fjsp_sub_v2r8(iz1,jz3);
588             dx21             = _fjsp_sub_v2r8(ix2,jx1);
589             dy21             = _fjsp_sub_v2r8(iy2,jy1);
590             dz21             = _fjsp_sub_v2r8(iz2,jz1);
591             dx22             = _fjsp_sub_v2r8(ix2,jx2);
592             dy22             = _fjsp_sub_v2r8(iy2,jy2);
593             dz22             = _fjsp_sub_v2r8(iz2,jz2);
594             dx23             = _fjsp_sub_v2r8(ix2,jx3);
595             dy23             = _fjsp_sub_v2r8(iy2,jy3);
596             dz23             = _fjsp_sub_v2r8(iz2,jz3);
597             dx31             = _fjsp_sub_v2r8(ix3,jx1);
598             dy31             = _fjsp_sub_v2r8(iy3,jy1);
599             dz31             = _fjsp_sub_v2r8(iz3,jz1);
600             dx32             = _fjsp_sub_v2r8(ix3,jx2);
601             dy32             = _fjsp_sub_v2r8(iy3,jy2);
602             dz32             = _fjsp_sub_v2r8(iz3,jz2);
603             dx33             = _fjsp_sub_v2r8(ix3,jx3);
604             dy33             = _fjsp_sub_v2r8(iy3,jy3);
605             dz33             = _fjsp_sub_v2r8(iz3,jz3);
606
607             /* Calculate squared distance and things based on it */
608             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
609             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
610             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
611             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
612             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
613             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
614             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
615             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
616             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
617             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
618
619             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
620             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
621             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
622             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
623             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
624             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
625             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
626             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
627             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
628             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
629
630             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
631             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
632             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
633             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
634             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
635             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
636             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
637             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
638             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
639
640             fjx0             = _fjsp_setzero_v2r8();
641             fjy0             = _fjsp_setzero_v2r8();
642             fjz0             = _fjsp_setzero_v2r8();
643             fjx1             = _fjsp_setzero_v2r8();
644             fjy1             = _fjsp_setzero_v2r8();
645             fjz1             = _fjsp_setzero_v2r8();
646             fjx2             = _fjsp_setzero_v2r8();
647             fjy2             = _fjsp_setzero_v2r8();
648             fjz2             = _fjsp_setzero_v2r8();
649             fjx3             = _fjsp_setzero_v2r8();
650             fjy3             = _fjsp_setzero_v2r8();
651             fjz3             = _fjsp_setzero_v2r8();
652
653             /**************************
654              * CALCULATE INTERACTIONS *
655              **************************/
656
657             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
658
659             /* Calculate table index by multiplying r with table scale and truncate to integer */
660             rt               = _fjsp_mul_v2r8(r00,vftabscale);
661             itab_tmp         = _fjsp_dtox_v2r8(rt);
662             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
663             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
664             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
665
666             vfconv.i[0]     *= 8;
667             vfconv.i[1]     *= 8;
668
669             /* CUBIC SPLINE TABLE DISPERSION */
670             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
671             F                = _fjsp_setzero_v2r8();
672             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
673             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
674             H                = _fjsp_setzero_v2r8();
675             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
676             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
677             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
678             vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
679             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
680             fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
681
682             /* CUBIC SPLINE TABLE REPULSION */
683             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
684             F                = _fjsp_setzero_v2r8();
685             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
686             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
687             H                = _fjsp_setzero_v2r8();
688             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
689             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
690             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
691             vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
692             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
693             fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
694             vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
695             fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
696
697             /* Update potential sum for this i atom from the interaction with this j atom. */
698             vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
699             vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
700
701             fscal            = fvdw;
702
703             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
704
705             /* Update vectorial force */
706             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
707             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
708             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
709             
710             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
711             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
712             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
713
714             /**************************
715              * CALCULATE INTERACTIONS *
716              **************************/
717
718             /* REACTION-FIELD ELECTROSTATICS */
719             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
720             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
721
722             /* Update potential sum for this i atom from the interaction with this j atom. */
723             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
724             velecsum         = _fjsp_add_v2r8(velecsum,velec);
725
726             fscal            = felec;
727
728             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
729
730             /* Update vectorial force */
731             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
732             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
733             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
734             
735             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
736             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
737             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
738
739             /**************************
740              * CALCULATE INTERACTIONS *
741              **************************/
742
743             /* REACTION-FIELD ELECTROSTATICS */
744             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
745             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
746
747             /* Update potential sum for this i atom from the interaction with this j atom. */
748             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
749             velecsum         = _fjsp_add_v2r8(velecsum,velec);
750
751             fscal            = felec;
752
753             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
754
755             /* Update vectorial force */
756             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
757             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
758             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
759             
760             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
761             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
762             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
763
764             /**************************
765              * CALCULATE INTERACTIONS *
766              **************************/
767
768             /* REACTION-FIELD ELECTROSTATICS */
769             velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
770             felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
771
772             /* Update potential sum for this i atom from the interaction with this j atom. */
773             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
774             velecsum         = _fjsp_add_v2r8(velecsum,velec);
775
776             fscal            = felec;
777
778             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
779
780             /* Update vectorial force */
781             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
782             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
783             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
784             
785             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
786             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
787             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
788
789             /**************************
790              * CALCULATE INTERACTIONS *
791              **************************/
792
793             /* REACTION-FIELD ELECTROSTATICS */
794             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
795             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
796
797             /* Update potential sum for this i atom from the interaction with this j atom. */
798             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
799             velecsum         = _fjsp_add_v2r8(velecsum,velec);
800
801             fscal            = felec;
802
803             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
804
805             /* Update vectorial force */
806             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
807             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
808             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
809             
810             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
811             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
812             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
813
814             /**************************
815              * CALCULATE INTERACTIONS *
816              **************************/
817
818             /* REACTION-FIELD ELECTROSTATICS */
819             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
820             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
821
822             /* Update potential sum for this i atom from the interaction with this j atom. */
823             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
824             velecsum         = _fjsp_add_v2r8(velecsum,velec);
825
826             fscal            = felec;
827
828             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
829
830             /* Update vectorial force */
831             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
832             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
833             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
834             
835             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
836             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
837             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
838
839             /**************************
840              * CALCULATE INTERACTIONS *
841              **************************/
842
843             /* REACTION-FIELD ELECTROSTATICS */
844             velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
845             felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
846
847             /* Update potential sum for this i atom from the interaction with this j atom. */
848             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
849             velecsum         = _fjsp_add_v2r8(velecsum,velec);
850
851             fscal            = felec;
852
853             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
854
855             /* Update vectorial force */
856             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
857             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
858             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
859             
860             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
861             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
862             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
863
864             /**************************
865              * CALCULATE INTERACTIONS *
866              **************************/
867
868             /* REACTION-FIELD ELECTROSTATICS */
869             velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
870             felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
871
872             /* Update potential sum for this i atom from the interaction with this j atom. */
873             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
874             velecsum         = _fjsp_add_v2r8(velecsum,velec);
875
876             fscal            = felec;
877
878             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
879
880             /* Update vectorial force */
881             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
882             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
883             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
884             
885             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
886             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
887             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
888
889             /**************************
890              * CALCULATE INTERACTIONS *
891              **************************/
892
893             /* REACTION-FIELD ELECTROSTATICS */
894             velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
895             felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
896
897             /* Update potential sum for this i atom from the interaction with this j atom. */
898             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
899             velecsum         = _fjsp_add_v2r8(velecsum,velec);
900
901             fscal            = felec;
902
903             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
904
905             /* Update vectorial force */
906             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
907             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
908             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
909             
910             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
911             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
912             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
913
914             /**************************
915              * CALCULATE INTERACTIONS *
916              **************************/
917
918             /* REACTION-FIELD ELECTROSTATICS */
919             velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
920             felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
921
922             /* Update potential sum for this i atom from the interaction with this j atom. */
923             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
924             velecsum         = _fjsp_add_v2r8(velecsum,velec);
925
926             fscal            = felec;
927
928             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
929
930             /* Update vectorial force */
931             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
932             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
933             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
934             
935             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
936             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
937             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
938
939             gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
940
941             /* Inner loop uses 377 flops */
942         }
943
944         /* End of innermost loop */
945
946         gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
947                                               f+i_coord_offset,fshift+i_shift_offset);
948
949         ggid                        = gid[iidx];
950         /* Update potential energies */
951         gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
952         gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
953
954         /* Increment number of inner iterations */
955         inneriter                  += j_index_end - j_index_start;
956
957         /* Outer loop uses 26 flops */
958     }
959
960     /* Increment number of outer iterations */
961     outeriter        += nri;
962
963     /* Update outer/inner flops */
964
965     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*377);
966 }
967 /*
968  * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double
969  * Electrostatics interaction: ReactionField
970  * VdW interaction:            CubicSplineTable
971  * Geometry:                   Water4-Water4
972  * Calculate force/pot:        Force
973  */
974 void
975 nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double
976                     (t_nblist                    * gmx_restrict       nlist,
977                      rvec                        * gmx_restrict          xx,
978                      rvec                        * gmx_restrict          ff,
979                      t_forcerec                  * gmx_restrict          fr,
980                      t_mdatoms                   * gmx_restrict     mdatoms,
981                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
982                      t_nrnb                      * gmx_restrict        nrnb)
983 {
984     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
985      * just 0 for non-waters.
986      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
987      * jnr indices corresponding to data put in the four positions in the SIMD register.
988      */
989     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
990     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
991     int              jnrA,jnrB;
992     int              j_coord_offsetA,j_coord_offsetB;
993     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
994     real             rcutoff_scalar;
995     real             *shiftvec,*fshift,*x,*f;
996     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
997     int              vdwioffset0;
998     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
999     int              vdwioffset1;
1000     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1001     int              vdwioffset2;
1002     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1003     int              vdwioffset3;
1004     _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
1005     int              vdwjidx0A,vdwjidx0B;
1006     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1007     int              vdwjidx1A,vdwjidx1B;
1008     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1009     int              vdwjidx2A,vdwjidx2B;
1010     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1011     int              vdwjidx3A,vdwjidx3B;
1012     _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
1013     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1014     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1015     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1016     _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
1017     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1018     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1019     _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
1020     _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
1021     _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
1022     _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
1023     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
1024     real             *charge;
1025     int              nvdwtype;
1026     _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
1027     int              *vdwtype;
1028     real             *vdwparam;
1029     _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
1030     _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
1031     _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
1032     real             *vftab;
1033     _fjsp_v2r8       itab_tmp;
1034     _fjsp_v2r8       dummy_mask,cutoff_mask;
1035     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
1036     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
1037     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1038
1039     x                = xx[0];
1040     f                = ff[0];
1041
1042     nri              = nlist->nri;
1043     iinr             = nlist->iinr;
1044     jindex           = nlist->jindex;
1045     jjnr             = nlist->jjnr;
1046     shiftidx         = nlist->shift;
1047     gid              = nlist->gid;
1048     shiftvec         = fr->shift_vec[0];
1049     fshift           = fr->fshift[0];
1050     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
1051     charge           = mdatoms->chargeA;
1052     krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
1053     krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
1054     crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
1055     nvdwtype         = fr->ntype;
1056     vdwparam         = fr->nbfp;
1057     vdwtype          = mdatoms->typeA;
1058
1059     vftab            = kernel_data->table_vdw->data;
1060     vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
1061
1062     /* Setup water-specific parameters */
1063     inr              = nlist->iinr[0];
1064     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1065     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1066     iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
1067     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
1068
1069     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
1070     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
1071     jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
1072     vdwjidx0A        = 2*vdwtype[inr+0];
1073     c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
1074     c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
1075     qq11             = _fjsp_mul_v2r8(iq1,jq1);
1076     qq12             = _fjsp_mul_v2r8(iq1,jq2);
1077     qq13             = _fjsp_mul_v2r8(iq1,jq3);
1078     qq21             = _fjsp_mul_v2r8(iq2,jq1);
1079     qq22             = _fjsp_mul_v2r8(iq2,jq2);
1080     qq23             = _fjsp_mul_v2r8(iq2,jq3);
1081     qq31             = _fjsp_mul_v2r8(iq3,jq1);
1082     qq32             = _fjsp_mul_v2r8(iq3,jq2);
1083     qq33             = _fjsp_mul_v2r8(iq3,jq3);
1084
1085     /* Avoid stupid compiler warnings */
1086     jnrA = jnrB = 0;
1087     j_coord_offsetA = 0;
1088     j_coord_offsetB = 0;
1089
1090     outeriter        = 0;
1091     inneriter        = 0;
1092
1093     /* Start outer loop over neighborlists */
1094     for(iidx=0; iidx<nri; iidx++)
1095     {
1096         /* Load shift vector for this list */
1097         i_shift_offset   = DIM*shiftidx[iidx];
1098
1099         /* Load limits for loop over neighbors */
1100         j_index_start    = jindex[iidx];
1101         j_index_end      = jindex[iidx+1];
1102
1103         /* Get outer coordinate index */
1104         inr              = iinr[iidx];
1105         i_coord_offset   = DIM*inr;
1106
1107         /* Load i particle coords and add shift vector */
1108         gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
1109                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
1110
1111         fix0             = _fjsp_setzero_v2r8();
1112         fiy0             = _fjsp_setzero_v2r8();
1113         fiz0             = _fjsp_setzero_v2r8();
1114         fix1             = _fjsp_setzero_v2r8();
1115         fiy1             = _fjsp_setzero_v2r8();
1116         fiz1             = _fjsp_setzero_v2r8();
1117         fix2             = _fjsp_setzero_v2r8();
1118         fiy2             = _fjsp_setzero_v2r8();
1119         fiz2             = _fjsp_setzero_v2r8();
1120         fix3             = _fjsp_setzero_v2r8();
1121         fiy3             = _fjsp_setzero_v2r8();
1122         fiz3             = _fjsp_setzero_v2r8();
1123
1124         /* Start inner kernel loop */
1125         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1126         {
1127
1128             /* Get j neighbor index, and coordinate index */
1129             jnrA             = jjnr[jidx];
1130             jnrB             = jjnr[jidx+1];
1131             j_coord_offsetA  = DIM*jnrA;
1132             j_coord_offsetB  = DIM*jnrB;
1133
1134             /* load j atom coordinates */
1135             gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
1136                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
1137                                               &jy2,&jz2,&jx3,&jy3,&jz3);
1138
1139             /* Calculate displacement vector */
1140             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1141             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1142             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1143             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1144             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1145             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1146             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1147             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1148             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1149             dx13             = _fjsp_sub_v2r8(ix1,jx3);
1150             dy13             = _fjsp_sub_v2r8(iy1,jy3);
1151             dz13             = _fjsp_sub_v2r8(iz1,jz3);
1152             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1153             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1154             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1155             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1156             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1157             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1158             dx23             = _fjsp_sub_v2r8(ix2,jx3);
1159             dy23             = _fjsp_sub_v2r8(iy2,jy3);
1160             dz23             = _fjsp_sub_v2r8(iz2,jz3);
1161             dx31             = _fjsp_sub_v2r8(ix3,jx1);
1162             dy31             = _fjsp_sub_v2r8(iy3,jy1);
1163             dz31             = _fjsp_sub_v2r8(iz3,jz1);
1164             dx32             = _fjsp_sub_v2r8(ix3,jx2);
1165             dy32             = _fjsp_sub_v2r8(iy3,jy2);
1166             dz32             = _fjsp_sub_v2r8(iz3,jz2);
1167             dx33             = _fjsp_sub_v2r8(ix3,jx3);
1168             dy33             = _fjsp_sub_v2r8(iy3,jy3);
1169             dz33             = _fjsp_sub_v2r8(iz3,jz3);
1170
1171             /* Calculate squared distance and things based on it */
1172             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1173             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1174             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1175             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
1176             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1177             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1178             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
1179             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
1180             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
1181             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
1182
1183             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
1184             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1185             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1186             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
1187             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1188             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1189             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
1190             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
1191             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
1192             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
1193
1194             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1195             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1196             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
1197             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1198             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1199             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
1200             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
1201             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
1202             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
1203
1204             fjx0             = _fjsp_setzero_v2r8();
1205             fjy0             = _fjsp_setzero_v2r8();
1206             fjz0             = _fjsp_setzero_v2r8();
1207             fjx1             = _fjsp_setzero_v2r8();
1208             fjy1             = _fjsp_setzero_v2r8();
1209             fjz1             = _fjsp_setzero_v2r8();
1210             fjx2             = _fjsp_setzero_v2r8();
1211             fjy2             = _fjsp_setzero_v2r8();
1212             fjz2             = _fjsp_setzero_v2r8();
1213             fjx3             = _fjsp_setzero_v2r8();
1214             fjy3             = _fjsp_setzero_v2r8();
1215             fjz3             = _fjsp_setzero_v2r8();
1216
1217             /**************************
1218              * CALCULATE INTERACTIONS *
1219              **************************/
1220
1221             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
1222
1223             /* Calculate table index by multiplying r with table scale and truncate to integer */
1224             rt               = _fjsp_mul_v2r8(r00,vftabscale);
1225             itab_tmp         = _fjsp_dtox_v2r8(rt);
1226             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1227             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1228             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1229
1230             vfconv.i[0]     *= 8;
1231             vfconv.i[1]     *= 8;
1232
1233             /* CUBIC SPLINE TABLE DISPERSION */
1234             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1235             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1236             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1237             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
1238             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
1239             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1240             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1241             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1242             fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
1243
1244             /* CUBIC SPLINE TABLE REPULSION */
1245             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
1246             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
1247             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1248             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
1249             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
1250             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1251             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1252             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1253             fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
1254             fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
1255
1256             fscal            = fvdw;
1257
1258             /* Update vectorial force */
1259             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1260             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1261             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1262             
1263             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1264             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1265             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1266
1267             /**************************
1268              * CALCULATE INTERACTIONS *
1269              **************************/
1270
1271             /* REACTION-FIELD ELECTROSTATICS */
1272             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
1273
1274             fscal            = felec;
1275
1276             /* Update vectorial force */
1277             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1278             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1279             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1280             
1281             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1282             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1283             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1284
1285             /**************************
1286              * CALCULATE INTERACTIONS *
1287              **************************/
1288
1289             /* REACTION-FIELD ELECTROSTATICS */
1290             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
1291
1292             fscal            = felec;
1293
1294             /* Update vectorial force */
1295             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1296             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1297             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1298             
1299             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1300             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1301             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1302
1303             /**************************
1304              * CALCULATE INTERACTIONS *
1305              **************************/
1306
1307             /* REACTION-FIELD ELECTROSTATICS */
1308             felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
1309
1310             fscal            = felec;
1311
1312             /* Update vectorial force */
1313             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
1314             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
1315             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
1316             
1317             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
1318             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
1319             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
1320
1321             /**************************
1322              * CALCULATE INTERACTIONS *
1323              **************************/
1324
1325             /* REACTION-FIELD ELECTROSTATICS */
1326             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1327
1328             fscal            = felec;
1329
1330             /* Update vectorial force */
1331             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1332             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1333             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1334             
1335             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1336             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1337             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1338
1339             /**************************
1340              * CALCULATE INTERACTIONS *
1341              **************************/
1342
1343             /* REACTION-FIELD ELECTROSTATICS */
1344             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1345
1346             fscal            = felec;
1347
1348             /* Update vectorial force */
1349             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1350             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1351             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1352             
1353             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1354             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1355             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1356
1357             /**************************
1358              * CALCULATE INTERACTIONS *
1359              **************************/
1360
1361             /* REACTION-FIELD ELECTROSTATICS */
1362             felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
1363
1364             fscal            = felec;
1365
1366             /* Update vectorial force */
1367             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
1368             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
1369             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
1370             
1371             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
1372             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
1373             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
1374
1375             /**************************
1376              * CALCULATE INTERACTIONS *
1377              **************************/
1378
1379             /* REACTION-FIELD ELECTROSTATICS */
1380             felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
1381
1382             fscal            = felec;
1383
1384             /* Update vectorial force */
1385             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
1386             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
1387             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
1388             
1389             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
1390             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
1391             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
1392
1393             /**************************
1394              * CALCULATE INTERACTIONS *
1395              **************************/
1396
1397             /* REACTION-FIELD ELECTROSTATICS */
1398             felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
1399
1400             fscal            = felec;
1401
1402             /* Update vectorial force */
1403             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
1404             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
1405             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
1406             
1407             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
1408             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
1409             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
1410
1411             /**************************
1412              * CALCULATE INTERACTIONS *
1413              **************************/
1414
1415             /* REACTION-FIELD ELECTROSTATICS */
1416             felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
1417
1418             fscal            = felec;
1419
1420             /* Update vectorial force */
1421             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
1422             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
1423             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
1424             
1425             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
1426             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
1427             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
1428
1429             gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1430
1431             /* Inner loop uses 324 flops */
1432         }
1433
1434         if(jidx<j_index_end)
1435         {
1436
1437             jnrA             = jjnr[jidx];
1438             j_coord_offsetA  = DIM*jnrA;
1439
1440             /* load j atom coordinates */
1441             gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1442                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
1443                                               &jy2,&jz2,&jx3,&jy3,&jz3);
1444
1445             /* Calculate displacement vector */
1446             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1447             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1448             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1449             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1450             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1451             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1452             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1453             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1454             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1455             dx13             = _fjsp_sub_v2r8(ix1,jx3);
1456             dy13             = _fjsp_sub_v2r8(iy1,jy3);
1457             dz13             = _fjsp_sub_v2r8(iz1,jz3);
1458             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1459             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1460             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1461             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1462             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1463             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1464             dx23             = _fjsp_sub_v2r8(ix2,jx3);
1465             dy23             = _fjsp_sub_v2r8(iy2,jy3);
1466             dz23             = _fjsp_sub_v2r8(iz2,jz3);
1467             dx31             = _fjsp_sub_v2r8(ix3,jx1);
1468             dy31             = _fjsp_sub_v2r8(iy3,jy1);
1469             dz31             = _fjsp_sub_v2r8(iz3,jz1);
1470             dx32             = _fjsp_sub_v2r8(ix3,jx2);
1471             dy32             = _fjsp_sub_v2r8(iy3,jy2);
1472             dz32             = _fjsp_sub_v2r8(iz3,jz2);
1473             dx33             = _fjsp_sub_v2r8(ix3,jx3);
1474             dy33             = _fjsp_sub_v2r8(iy3,jy3);
1475             dz33             = _fjsp_sub_v2r8(iz3,jz3);
1476
1477             /* Calculate squared distance and things based on it */
1478             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1479             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1480             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1481             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
1482             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1483             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1484             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
1485             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
1486             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
1487             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
1488
1489             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
1490             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1491             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1492             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
1493             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1494             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1495             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
1496             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
1497             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
1498             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
1499
1500             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1501             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1502             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
1503             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1504             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1505             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
1506             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
1507             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
1508             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
1509
1510             fjx0             = _fjsp_setzero_v2r8();
1511             fjy0             = _fjsp_setzero_v2r8();
1512             fjz0             = _fjsp_setzero_v2r8();
1513             fjx1             = _fjsp_setzero_v2r8();
1514             fjy1             = _fjsp_setzero_v2r8();
1515             fjz1             = _fjsp_setzero_v2r8();
1516             fjx2             = _fjsp_setzero_v2r8();
1517             fjy2             = _fjsp_setzero_v2r8();
1518             fjz2             = _fjsp_setzero_v2r8();
1519             fjx3             = _fjsp_setzero_v2r8();
1520             fjy3             = _fjsp_setzero_v2r8();
1521             fjz3             = _fjsp_setzero_v2r8();
1522
1523             /**************************
1524              * CALCULATE INTERACTIONS *
1525              **************************/
1526
1527             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
1528
1529             /* Calculate table index by multiplying r with table scale and truncate to integer */
1530             rt               = _fjsp_mul_v2r8(r00,vftabscale);
1531             itab_tmp         = _fjsp_dtox_v2r8(rt);
1532             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1533             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1534             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1535
1536             vfconv.i[0]     *= 8;
1537             vfconv.i[1]     *= 8;
1538
1539             /* CUBIC SPLINE TABLE DISPERSION */
1540             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1541             F                = _fjsp_setzero_v2r8();
1542             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1543             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
1544             H                = _fjsp_setzero_v2r8();
1545             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1546             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1547             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1548             fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
1549
1550             /* CUBIC SPLINE TABLE REPULSION */
1551             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
1552             F                = _fjsp_setzero_v2r8();
1553             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1554             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
1555             H                = _fjsp_setzero_v2r8();
1556             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1557             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1558             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1559             fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
1560             fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
1561
1562             fscal            = fvdw;
1563
1564             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1565
1566             /* Update vectorial force */
1567             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1568             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1569             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1570             
1571             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1572             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1573             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1574
1575             /**************************
1576              * CALCULATE INTERACTIONS *
1577              **************************/
1578
1579             /* REACTION-FIELD ELECTROSTATICS */
1580             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
1581
1582             fscal            = felec;
1583
1584             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1585
1586             /* Update vectorial force */
1587             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1588             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1589             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1590             
1591             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1592             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1593             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1594
1595             /**************************
1596              * CALCULATE INTERACTIONS *
1597              **************************/
1598
1599             /* REACTION-FIELD ELECTROSTATICS */
1600             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
1601
1602             fscal            = felec;
1603
1604             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1605
1606             /* Update vectorial force */
1607             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1608             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1609             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1610             
1611             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1612             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1613             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1614
1615             /**************************
1616              * CALCULATE INTERACTIONS *
1617              **************************/
1618
1619             /* REACTION-FIELD ELECTROSTATICS */
1620             felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
1621
1622             fscal            = felec;
1623
1624             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1625
1626             /* Update vectorial force */
1627             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
1628             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
1629             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
1630             
1631             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
1632             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
1633             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
1634
1635             /**************************
1636              * CALCULATE INTERACTIONS *
1637              **************************/
1638
1639             /* REACTION-FIELD ELECTROSTATICS */
1640             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1641
1642             fscal            = felec;
1643
1644             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1645
1646             /* Update vectorial force */
1647             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1648             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1649             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1650             
1651             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1652             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1653             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1654
1655             /**************************
1656              * CALCULATE INTERACTIONS *
1657              **************************/
1658
1659             /* REACTION-FIELD ELECTROSTATICS */
1660             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1661
1662             fscal            = felec;
1663
1664             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1665
1666             /* Update vectorial force */
1667             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1668             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1669             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1670             
1671             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1672             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1673             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1674
1675             /**************************
1676              * CALCULATE INTERACTIONS *
1677              **************************/
1678
1679             /* REACTION-FIELD ELECTROSTATICS */
1680             felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
1681
1682             fscal            = felec;
1683
1684             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1685
1686             /* Update vectorial force */
1687             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
1688             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
1689             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
1690             
1691             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
1692             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
1693             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
1694
1695             /**************************
1696              * CALCULATE INTERACTIONS *
1697              **************************/
1698
1699             /* REACTION-FIELD ELECTROSTATICS */
1700             felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
1701
1702             fscal            = felec;
1703
1704             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1705
1706             /* Update vectorial force */
1707             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
1708             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
1709             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
1710             
1711             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
1712             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
1713             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
1714
1715             /**************************
1716              * CALCULATE INTERACTIONS *
1717              **************************/
1718
1719             /* REACTION-FIELD ELECTROSTATICS */
1720             felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
1721
1722             fscal            = felec;
1723
1724             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1725
1726             /* Update vectorial force */
1727             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
1728             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
1729             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
1730             
1731             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
1732             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
1733             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
1734
1735             /**************************
1736              * CALCULATE INTERACTIONS *
1737              **************************/
1738
1739             /* REACTION-FIELD ELECTROSTATICS */
1740             felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
1741
1742             fscal            = felec;
1743
1744             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1745
1746             /* Update vectorial force */
1747             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
1748             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
1749             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
1750             
1751             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
1752             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
1753             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
1754
1755             gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1756
1757             /* Inner loop uses 324 flops */
1758         }
1759
1760         /* End of innermost loop */
1761
1762         gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
1763                                               f+i_coord_offset,fshift+i_shift_offset);
1764
1765         /* Increment number of inner iterations */
1766         inneriter                  += j_index_end - j_index_start;
1767
1768         /* Outer loop uses 24 flops */
1769     }
1770
1771     /* Increment number of outer iterations */
1772     outeriter        += nri;
1773
1774     /* Update outer/inner flops */
1775
1776     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*324);
1777 }