e1e6204dbe4c9c025aa0371c121213e583e834f2
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_sparc64_hpc_ace_double / nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_sparc64_hpc_ace_double.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
37  */
38 #include "config.h"
39
40 #include <math.h>
41
42 #include "../nb_kernel.h"
43 #include "gromacs/legacyheaders/types/simple.h"
44 #include "gromacs/math/vec.h"
45 #include "gromacs/legacyheaders/nrnb.h"
46
47 #include "kernelutil_sparc64_hpc_ace_double.h"
48
49 /*
50  * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double
51  * Electrostatics interaction: CubicSplineTable
52  * VdW interaction:            LennardJones
53  * Geometry:                   Water3-Water3
54  * Calculate force/pot:        PotentialAndForce
55  */
56 void
57 nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double
58                     (t_nblist                    * gmx_restrict       nlist,
59                      rvec                        * gmx_restrict          xx,
60                      rvec                        * gmx_restrict          ff,
61                      t_forcerec                  * gmx_restrict          fr,
62                      t_mdatoms                   * gmx_restrict     mdatoms,
63                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
64                      t_nrnb                      * gmx_restrict        nrnb)
65 {
66     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
67      * just 0 for non-waters.
68      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
69      * jnr indices corresponding to data put in the four positions in the SIMD register.
70      */
71     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
72     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
73     int              jnrA,jnrB;
74     int              j_coord_offsetA,j_coord_offsetB;
75     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
76     real             rcutoff_scalar;
77     real             *shiftvec,*fshift,*x,*f;
78     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
79     int              vdwioffset0;
80     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
81     int              vdwioffset1;
82     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
83     int              vdwioffset2;
84     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
85     int              vdwjidx0A,vdwjidx0B;
86     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
87     int              vdwjidx1A,vdwjidx1B;
88     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
89     int              vdwjidx2A,vdwjidx2B;
90     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
91     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
92     _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
93     _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
94     _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
95     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
96     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
97     _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
98     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
99     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
100     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
101     real             *charge;
102     int              nvdwtype;
103     _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
104     int              *vdwtype;
105     real             *vdwparam;
106     _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
107     _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
108     _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
109     real             *vftab;
110     _fjsp_v2r8       itab_tmp;
111     _fjsp_v2r8       dummy_mask,cutoff_mask;
112     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
113     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
114     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
115
116     x                = xx[0];
117     f                = ff[0];
118
119     nri              = nlist->nri;
120     iinr             = nlist->iinr;
121     jindex           = nlist->jindex;
122     jjnr             = nlist->jjnr;
123     shiftidx         = nlist->shift;
124     gid              = nlist->gid;
125     shiftvec         = fr->shift_vec[0];
126     fshift           = fr->fshift[0];
127     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
128     charge           = mdatoms->chargeA;
129     nvdwtype         = fr->ntype;
130     vdwparam         = fr->nbfp;
131     vdwtype          = mdatoms->typeA;
132
133     vftab            = kernel_data->table_elec->data;
134     vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
135
136     /* Setup water-specific parameters */
137     inr              = nlist->iinr[0];
138     iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
139     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
140     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
141     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
142
143     jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
144     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
145     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
146     vdwjidx0A        = 2*vdwtype[inr+0];
147     qq00             = _fjsp_mul_v2r8(iq0,jq0);
148     c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
149     c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
150     qq01             = _fjsp_mul_v2r8(iq0,jq1);
151     qq02             = _fjsp_mul_v2r8(iq0,jq2);
152     qq10             = _fjsp_mul_v2r8(iq1,jq0);
153     qq11             = _fjsp_mul_v2r8(iq1,jq1);
154     qq12             = _fjsp_mul_v2r8(iq1,jq2);
155     qq20             = _fjsp_mul_v2r8(iq2,jq0);
156     qq21             = _fjsp_mul_v2r8(iq2,jq1);
157     qq22             = _fjsp_mul_v2r8(iq2,jq2);
158
159     /* Avoid stupid compiler warnings */
160     jnrA = jnrB = 0;
161     j_coord_offsetA = 0;
162     j_coord_offsetB = 0;
163
164     outeriter        = 0;
165     inneriter        = 0;
166
167     /* Start outer loop over neighborlists */
168     for(iidx=0; iidx<nri; iidx++)
169     {
170         /* Load shift vector for this list */
171         i_shift_offset   = DIM*shiftidx[iidx];
172
173         /* Load limits for loop over neighbors */
174         j_index_start    = jindex[iidx];
175         j_index_end      = jindex[iidx+1];
176
177         /* Get outer coordinate index */
178         inr              = iinr[iidx];
179         i_coord_offset   = DIM*inr;
180
181         /* Load i particle coords and add shift vector */
182         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
183                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
184
185         fix0             = _fjsp_setzero_v2r8();
186         fiy0             = _fjsp_setzero_v2r8();
187         fiz0             = _fjsp_setzero_v2r8();
188         fix1             = _fjsp_setzero_v2r8();
189         fiy1             = _fjsp_setzero_v2r8();
190         fiz1             = _fjsp_setzero_v2r8();
191         fix2             = _fjsp_setzero_v2r8();
192         fiy2             = _fjsp_setzero_v2r8();
193         fiz2             = _fjsp_setzero_v2r8();
194
195         /* Reset potential sums */
196         velecsum         = _fjsp_setzero_v2r8();
197         vvdwsum          = _fjsp_setzero_v2r8();
198
199         /* Start inner kernel loop */
200         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
201         {
202
203             /* Get j neighbor index, and coordinate index */
204             jnrA             = jjnr[jidx];
205             jnrB             = jjnr[jidx+1];
206             j_coord_offsetA  = DIM*jnrA;
207             j_coord_offsetB  = DIM*jnrB;
208
209             /* load j atom coordinates */
210             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
211                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
212
213             /* Calculate displacement vector */
214             dx00             = _fjsp_sub_v2r8(ix0,jx0);
215             dy00             = _fjsp_sub_v2r8(iy0,jy0);
216             dz00             = _fjsp_sub_v2r8(iz0,jz0);
217             dx01             = _fjsp_sub_v2r8(ix0,jx1);
218             dy01             = _fjsp_sub_v2r8(iy0,jy1);
219             dz01             = _fjsp_sub_v2r8(iz0,jz1);
220             dx02             = _fjsp_sub_v2r8(ix0,jx2);
221             dy02             = _fjsp_sub_v2r8(iy0,jy2);
222             dz02             = _fjsp_sub_v2r8(iz0,jz2);
223             dx10             = _fjsp_sub_v2r8(ix1,jx0);
224             dy10             = _fjsp_sub_v2r8(iy1,jy0);
225             dz10             = _fjsp_sub_v2r8(iz1,jz0);
226             dx11             = _fjsp_sub_v2r8(ix1,jx1);
227             dy11             = _fjsp_sub_v2r8(iy1,jy1);
228             dz11             = _fjsp_sub_v2r8(iz1,jz1);
229             dx12             = _fjsp_sub_v2r8(ix1,jx2);
230             dy12             = _fjsp_sub_v2r8(iy1,jy2);
231             dz12             = _fjsp_sub_v2r8(iz1,jz2);
232             dx20             = _fjsp_sub_v2r8(ix2,jx0);
233             dy20             = _fjsp_sub_v2r8(iy2,jy0);
234             dz20             = _fjsp_sub_v2r8(iz2,jz0);
235             dx21             = _fjsp_sub_v2r8(ix2,jx1);
236             dy21             = _fjsp_sub_v2r8(iy2,jy1);
237             dz21             = _fjsp_sub_v2r8(iz2,jz1);
238             dx22             = _fjsp_sub_v2r8(ix2,jx2);
239             dy22             = _fjsp_sub_v2r8(iy2,jy2);
240             dz22             = _fjsp_sub_v2r8(iz2,jz2);
241
242             /* Calculate squared distance and things based on it */
243             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
244             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
245             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
246             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
247             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
248             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
249             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
250             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
251             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
252
253             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
254             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
255             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
256             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
257             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
258             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
259             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
260             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
261             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
262
263             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
264
265             fjx0             = _fjsp_setzero_v2r8();
266             fjy0             = _fjsp_setzero_v2r8();
267             fjz0             = _fjsp_setzero_v2r8();
268             fjx1             = _fjsp_setzero_v2r8();
269             fjy1             = _fjsp_setzero_v2r8();
270             fjz1             = _fjsp_setzero_v2r8();
271             fjx2             = _fjsp_setzero_v2r8();
272             fjy2             = _fjsp_setzero_v2r8();
273             fjz2             = _fjsp_setzero_v2r8();
274
275             /**************************
276              * CALCULATE INTERACTIONS *
277              **************************/
278
279             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
280
281             /* Calculate table index by multiplying r with table scale and truncate to integer */
282             rt               = _fjsp_mul_v2r8(r00,vftabscale);
283             itab_tmp         = _fjsp_dtox_v2r8(rt);
284             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
285             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
286             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
287
288             vfconv.i[0]     *= 4;
289             vfconv.i[1]     *= 4;
290
291             /* CUBIC SPLINE TABLE ELECTROSTATICS */
292             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
293             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
294             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
295             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
296             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
297             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
298             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
299             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
300             velec            = _fjsp_mul_v2r8(qq00,VV);
301             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
302             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
303
304             /* LENNARD-JONES DISPERSION/REPULSION */
305
306             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
307             vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
308             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
309             vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
310             fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
311
312             /* Update potential sum for this i atom from the interaction with this j atom. */
313             velecsum         = _fjsp_add_v2r8(velecsum,velec);
314             vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
315
316             fscal            = _fjsp_add_v2r8(felec,fvdw);
317
318             /* Update vectorial force */
319             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
320             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
321             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
322             
323             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
324             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
325             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
326
327             /**************************
328              * CALCULATE INTERACTIONS *
329              **************************/
330
331             r01              = _fjsp_mul_v2r8(rsq01,rinv01);
332
333             /* Calculate table index by multiplying r with table scale and truncate to integer */
334             rt               = _fjsp_mul_v2r8(r01,vftabscale);
335             itab_tmp         = _fjsp_dtox_v2r8(rt);
336             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
337             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
338             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
339
340             vfconv.i[0]     *= 4;
341             vfconv.i[1]     *= 4;
342
343             /* CUBIC SPLINE TABLE ELECTROSTATICS */
344             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
345             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
346             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
347             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
348             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
349             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
350             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
351             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
352             velec            = _fjsp_mul_v2r8(qq01,VV);
353             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
354             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
355
356             /* Update potential sum for this i atom from the interaction with this j atom. */
357             velecsum         = _fjsp_add_v2r8(velecsum,velec);
358
359             fscal            = felec;
360
361             /* Update vectorial force */
362             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
363             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
364             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
365             
366             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
367             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
368             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
369
370             /**************************
371              * CALCULATE INTERACTIONS *
372              **************************/
373
374             r02              = _fjsp_mul_v2r8(rsq02,rinv02);
375
376             /* Calculate table index by multiplying r with table scale and truncate to integer */
377             rt               = _fjsp_mul_v2r8(r02,vftabscale);
378             itab_tmp         = _fjsp_dtox_v2r8(rt);
379             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
380             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
381             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
382
383             vfconv.i[0]     *= 4;
384             vfconv.i[1]     *= 4;
385
386             /* CUBIC SPLINE TABLE ELECTROSTATICS */
387             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
388             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
389             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
390             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
391             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
392             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
393             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
394             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
395             velec            = _fjsp_mul_v2r8(qq02,VV);
396             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
397             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
398
399             /* Update potential sum for this i atom from the interaction with this j atom. */
400             velecsum         = _fjsp_add_v2r8(velecsum,velec);
401
402             fscal            = felec;
403
404             /* Update vectorial force */
405             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
406             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
407             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
408             
409             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
410             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
411             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
412
413             /**************************
414              * CALCULATE INTERACTIONS *
415              **************************/
416
417             r10              = _fjsp_mul_v2r8(rsq10,rinv10);
418
419             /* Calculate table index by multiplying r with table scale and truncate to integer */
420             rt               = _fjsp_mul_v2r8(r10,vftabscale);
421             itab_tmp         = _fjsp_dtox_v2r8(rt);
422             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
423             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
424             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
425
426             vfconv.i[0]     *= 4;
427             vfconv.i[1]     *= 4;
428
429             /* CUBIC SPLINE TABLE ELECTROSTATICS */
430             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
431             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
432             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
433             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
434             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
435             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
436             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
437             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
438             velec            = _fjsp_mul_v2r8(qq10,VV);
439             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
440             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
441
442             /* Update potential sum for this i atom from the interaction with this j atom. */
443             velecsum         = _fjsp_add_v2r8(velecsum,velec);
444
445             fscal            = felec;
446
447             /* Update vectorial force */
448             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
449             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
450             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
451             
452             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
453             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
454             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
455
456             /**************************
457              * CALCULATE INTERACTIONS *
458              **************************/
459
460             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
461
462             /* Calculate table index by multiplying r with table scale and truncate to integer */
463             rt               = _fjsp_mul_v2r8(r11,vftabscale);
464             itab_tmp         = _fjsp_dtox_v2r8(rt);
465             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
466             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
467             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
468
469             vfconv.i[0]     *= 4;
470             vfconv.i[1]     *= 4;
471
472             /* CUBIC SPLINE TABLE ELECTROSTATICS */
473             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
474             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
475             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
476             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
477             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
478             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
479             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
480             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
481             velec            = _fjsp_mul_v2r8(qq11,VV);
482             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
483             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
484
485             /* Update potential sum for this i atom from the interaction with this j atom. */
486             velecsum         = _fjsp_add_v2r8(velecsum,velec);
487
488             fscal            = felec;
489
490             /* Update vectorial force */
491             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
492             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
493             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
494             
495             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
496             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
497             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
498
499             /**************************
500              * CALCULATE INTERACTIONS *
501              **************************/
502
503             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
504
505             /* Calculate table index by multiplying r with table scale and truncate to integer */
506             rt               = _fjsp_mul_v2r8(r12,vftabscale);
507             itab_tmp         = _fjsp_dtox_v2r8(rt);
508             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
509             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
510             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
511
512             vfconv.i[0]     *= 4;
513             vfconv.i[1]     *= 4;
514
515             /* CUBIC SPLINE TABLE ELECTROSTATICS */
516             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
517             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
518             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
519             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
520             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
521             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
522             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
523             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
524             velec            = _fjsp_mul_v2r8(qq12,VV);
525             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
526             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
527
528             /* Update potential sum for this i atom from the interaction with this j atom. */
529             velecsum         = _fjsp_add_v2r8(velecsum,velec);
530
531             fscal            = felec;
532
533             /* Update vectorial force */
534             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
535             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
536             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
537             
538             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
539             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
540             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
541
542             /**************************
543              * CALCULATE INTERACTIONS *
544              **************************/
545
546             r20              = _fjsp_mul_v2r8(rsq20,rinv20);
547
548             /* Calculate table index by multiplying r with table scale and truncate to integer */
549             rt               = _fjsp_mul_v2r8(r20,vftabscale);
550             itab_tmp         = _fjsp_dtox_v2r8(rt);
551             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
552             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
553             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
554
555             vfconv.i[0]     *= 4;
556             vfconv.i[1]     *= 4;
557
558             /* CUBIC SPLINE TABLE ELECTROSTATICS */
559             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
560             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
561             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
562             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
563             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
564             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
565             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
566             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
567             velec            = _fjsp_mul_v2r8(qq20,VV);
568             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
569             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
570
571             /* Update potential sum for this i atom from the interaction with this j atom. */
572             velecsum         = _fjsp_add_v2r8(velecsum,velec);
573
574             fscal            = felec;
575
576             /* Update vectorial force */
577             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
578             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
579             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
580             
581             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
582             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
583             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
584
585             /**************************
586              * CALCULATE INTERACTIONS *
587              **************************/
588
589             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
590
591             /* Calculate table index by multiplying r with table scale and truncate to integer */
592             rt               = _fjsp_mul_v2r8(r21,vftabscale);
593             itab_tmp         = _fjsp_dtox_v2r8(rt);
594             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
595             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
596             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
597
598             vfconv.i[0]     *= 4;
599             vfconv.i[1]     *= 4;
600
601             /* CUBIC SPLINE TABLE ELECTROSTATICS */
602             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
603             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
604             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
605             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
606             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
607             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
608             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
609             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
610             velec            = _fjsp_mul_v2r8(qq21,VV);
611             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
612             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
613
614             /* Update potential sum for this i atom from the interaction with this j atom. */
615             velecsum         = _fjsp_add_v2r8(velecsum,velec);
616
617             fscal            = felec;
618
619             /* Update vectorial force */
620             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
621             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
622             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
623             
624             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
625             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
626             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
627
628             /**************************
629              * CALCULATE INTERACTIONS *
630              **************************/
631
632             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
633
634             /* Calculate table index by multiplying r with table scale and truncate to integer */
635             rt               = _fjsp_mul_v2r8(r22,vftabscale);
636             itab_tmp         = _fjsp_dtox_v2r8(rt);
637             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
638             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
639             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
640
641             vfconv.i[0]     *= 4;
642             vfconv.i[1]     *= 4;
643
644             /* CUBIC SPLINE TABLE ELECTROSTATICS */
645             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
646             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
647             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
648             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
649             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
650             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
651             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
652             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
653             velec            = _fjsp_mul_v2r8(qq22,VV);
654             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
655             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
656
657             /* Update potential sum for this i atom from the interaction with this j atom. */
658             velecsum         = _fjsp_add_v2r8(velecsum,velec);
659
660             fscal            = felec;
661
662             /* Update vectorial force */
663             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
664             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
665             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
666             
667             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
668             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
669             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
670
671             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
672
673             /* Inner loop uses 427 flops */
674         }
675
676         if(jidx<j_index_end)
677         {
678
679             jnrA             = jjnr[jidx];
680             j_coord_offsetA  = DIM*jnrA;
681
682             /* load j atom coordinates */
683             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
684                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
685
686             /* Calculate displacement vector */
687             dx00             = _fjsp_sub_v2r8(ix0,jx0);
688             dy00             = _fjsp_sub_v2r8(iy0,jy0);
689             dz00             = _fjsp_sub_v2r8(iz0,jz0);
690             dx01             = _fjsp_sub_v2r8(ix0,jx1);
691             dy01             = _fjsp_sub_v2r8(iy0,jy1);
692             dz01             = _fjsp_sub_v2r8(iz0,jz1);
693             dx02             = _fjsp_sub_v2r8(ix0,jx2);
694             dy02             = _fjsp_sub_v2r8(iy0,jy2);
695             dz02             = _fjsp_sub_v2r8(iz0,jz2);
696             dx10             = _fjsp_sub_v2r8(ix1,jx0);
697             dy10             = _fjsp_sub_v2r8(iy1,jy0);
698             dz10             = _fjsp_sub_v2r8(iz1,jz0);
699             dx11             = _fjsp_sub_v2r8(ix1,jx1);
700             dy11             = _fjsp_sub_v2r8(iy1,jy1);
701             dz11             = _fjsp_sub_v2r8(iz1,jz1);
702             dx12             = _fjsp_sub_v2r8(ix1,jx2);
703             dy12             = _fjsp_sub_v2r8(iy1,jy2);
704             dz12             = _fjsp_sub_v2r8(iz1,jz2);
705             dx20             = _fjsp_sub_v2r8(ix2,jx0);
706             dy20             = _fjsp_sub_v2r8(iy2,jy0);
707             dz20             = _fjsp_sub_v2r8(iz2,jz0);
708             dx21             = _fjsp_sub_v2r8(ix2,jx1);
709             dy21             = _fjsp_sub_v2r8(iy2,jy1);
710             dz21             = _fjsp_sub_v2r8(iz2,jz1);
711             dx22             = _fjsp_sub_v2r8(ix2,jx2);
712             dy22             = _fjsp_sub_v2r8(iy2,jy2);
713             dz22             = _fjsp_sub_v2r8(iz2,jz2);
714
715             /* Calculate squared distance and things based on it */
716             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
717             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
718             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
719             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
720             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
721             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
722             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
723             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
724             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
725
726             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
727             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
728             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
729             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
730             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
731             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
732             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
733             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
734             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
735
736             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
737
738             fjx0             = _fjsp_setzero_v2r8();
739             fjy0             = _fjsp_setzero_v2r8();
740             fjz0             = _fjsp_setzero_v2r8();
741             fjx1             = _fjsp_setzero_v2r8();
742             fjy1             = _fjsp_setzero_v2r8();
743             fjz1             = _fjsp_setzero_v2r8();
744             fjx2             = _fjsp_setzero_v2r8();
745             fjy2             = _fjsp_setzero_v2r8();
746             fjz2             = _fjsp_setzero_v2r8();
747
748             /**************************
749              * CALCULATE INTERACTIONS *
750              **************************/
751
752             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
753
754             /* Calculate table index by multiplying r with table scale and truncate to integer */
755             rt               = _fjsp_mul_v2r8(r00,vftabscale);
756             itab_tmp         = _fjsp_dtox_v2r8(rt);
757             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
758             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
759             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
760
761             vfconv.i[0]     *= 4;
762             vfconv.i[1]     *= 4;
763
764             /* CUBIC SPLINE TABLE ELECTROSTATICS */
765             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
766             F                = _fjsp_setzero_v2r8();
767             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
768             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
769             H                = _fjsp_setzero_v2r8();
770             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
771             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
772             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
773             velec            = _fjsp_mul_v2r8(qq00,VV);
774             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
775             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
776
777             /* LENNARD-JONES DISPERSION/REPULSION */
778
779             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
780             vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
781             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
782             vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
783             fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
784
785             /* Update potential sum for this i atom from the interaction with this j atom. */
786             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
787             velecsum         = _fjsp_add_v2r8(velecsum,velec);
788             vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
789             vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
790
791             fscal            = _fjsp_add_v2r8(felec,fvdw);
792
793             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
794
795             /* Update vectorial force */
796             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
797             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
798             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
799             
800             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
801             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
802             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
803
804             /**************************
805              * CALCULATE INTERACTIONS *
806              **************************/
807
808             r01              = _fjsp_mul_v2r8(rsq01,rinv01);
809
810             /* Calculate table index by multiplying r with table scale and truncate to integer */
811             rt               = _fjsp_mul_v2r8(r01,vftabscale);
812             itab_tmp         = _fjsp_dtox_v2r8(rt);
813             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
814             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
815             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
816
817             vfconv.i[0]     *= 4;
818             vfconv.i[1]     *= 4;
819
820             /* CUBIC SPLINE TABLE ELECTROSTATICS */
821             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
822             F                = _fjsp_setzero_v2r8();
823             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
824             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
825             H                = _fjsp_setzero_v2r8();
826             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
827             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
828             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
829             velec            = _fjsp_mul_v2r8(qq01,VV);
830             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
831             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
832
833             /* Update potential sum for this i atom from the interaction with this j atom. */
834             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
835             velecsum         = _fjsp_add_v2r8(velecsum,velec);
836
837             fscal            = felec;
838
839             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
840
841             /* Update vectorial force */
842             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
843             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
844             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
845             
846             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
847             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
848             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
849
850             /**************************
851              * CALCULATE INTERACTIONS *
852              **************************/
853
854             r02              = _fjsp_mul_v2r8(rsq02,rinv02);
855
856             /* Calculate table index by multiplying r with table scale and truncate to integer */
857             rt               = _fjsp_mul_v2r8(r02,vftabscale);
858             itab_tmp         = _fjsp_dtox_v2r8(rt);
859             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
860             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
861             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
862
863             vfconv.i[0]     *= 4;
864             vfconv.i[1]     *= 4;
865
866             /* CUBIC SPLINE TABLE ELECTROSTATICS */
867             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
868             F                = _fjsp_setzero_v2r8();
869             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
870             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
871             H                = _fjsp_setzero_v2r8();
872             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
873             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
874             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
875             velec            = _fjsp_mul_v2r8(qq02,VV);
876             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
877             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
878
879             /* Update potential sum for this i atom from the interaction with this j atom. */
880             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
881             velecsum         = _fjsp_add_v2r8(velecsum,velec);
882
883             fscal            = felec;
884
885             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
886
887             /* Update vectorial force */
888             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
889             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
890             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
891             
892             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
893             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
894             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
895
896             /**************************
897              * CALCULATE INTERACTIONS *
898              **************************/
899
900             r10              = _fjsp_mul_v2r8(rsq10,rinv10);
901
902             /* Calculate table index by multiplying r with table scale and truncate to integer */
903             rt               = _fjsp_mul_v2r8(r10,vftabscale);
904             itab_tmp         = _fjsp_dtox_v2r8(rt);
905             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
906             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
907             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
908
909             vfconv.i[0]     *= 4;
910             vfconv.i[1]     *= 4;
911
912             /* CUBIC SPLINE TABLE ELECTROSTATICS */
913             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
914             F                = _fjsp_setzero_v2r8();
915             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
916             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
917             H                = _fjsp_setzero_v2r8();
918             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
919             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
920             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
921             velec            = _fjsp_mul_v2r8(qq10,VV);
922             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
923             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
924
925             /* Update potential sum for this i atom from the interaction with this j atom. */
926             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
927             velecsum         = _fjsp_add_v2r8(velecsum,velec);
928
929             fscal            = felec;
930
931             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
932
933             /* Update vectorial force */
934             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
935             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
936             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
937             
938             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
939             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
940             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
941
942             /**************************
943              * CALCULATE INTERACTIONS *
944              **************************/
945
946             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
947
948             /* Calculate table index by multiplying r with table scale and truncate to integer */
949             rt               = _fjsp_mul_v2r8(r11,vftabscale);
950             itab_tmp         = _fjsp_dtox_v2r8(rt);
951             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
952             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
953             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
954
955             vfconv.i[0]     *= 4;
956             vfconv.i[1]     *= 4;
957
958             /* CUBIC SPLINE TABLE ELECTROSTATICS */
959             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
960             F                = _fjsp_setzero_v2r8();
961             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
962             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
963             H                = _fjsp_setzero_v2r8();
964             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
965             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
966             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
967             velec            = _fjsp_mul_v2r8(qq11,VV);
968             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
969             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
970
971             /* Update potential sum for this i atom from the interaction with this j atom. */
972             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
973             velecsum         = _fjsp_add_v2r8(velecsum,velec);
974
975             fscal            = felec;
976
977             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
978
979             /* Update vectorial force */
980             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
981             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
982             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
983             
984             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
985             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
986             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
987
988             /**************************
989              * CALCULATE INTERACTIONS *
990              **************************/
991
992             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
993
994             /* Calculate table index by multiplying r with table scale and truncate to integer */
995             rt               = _fjsp_mul_v2r8(r12,vftabscale);
996             itab_tmp         = _fjsp_dtox_v2r8(rt);
997             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
998             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
999             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1000
1001             vfconv.i[0]     *= 4;
1002             vfconv.i[1]     *= 4;
1003
1004             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1005             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1006             F                = _fjsp_setzero_v2r8();
1007             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1008             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1009             H                = _fjsp_setzero_v2r8();
1010             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1011             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1012             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
1013             velec            = _fjsp_mul_v2r8(qq12,VV);
1014             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1015             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
1016
1017             /* Update potential sum for this i atom from the interaction with this j atom. */
1018             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1019             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1020
1021             fscal            = felec;
1022
1023             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1024
1025             /* Update vectorial force */
1026             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1027             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1028             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1029             
1030             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1031             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1032             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1033
1034             /**************************
1035              * CALCULATE INTERACTIONS *
1036              **************************/
1037
1038             r20              = _fjsp_mul_v2r8(rsq20,rinv20);
1039
1040             /* Calculate table index by multiplying r with table scale and truncate to integer */
1041             rt               = _fjsp_mul_v2r8(r20,vftabscale);
1042             itab_tmp         = _fjsp_dtox_v2r8(rt);
1043             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1044             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1045             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1046
1047             vfconv.i[0]     *= 4;
1048             vfconv.i[1]     *= 4;
1049
1050             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1051             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1052             F                = _fjsp_setzero_v2r8();
1053             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1054             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1055             H                = _fjsp_setzero_v2r8();
1056             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1057             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1058             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
1059             velec            = _fjsp_mul_v2r8(qq20,VV);
1060             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1061             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
1062
1063             /* Update potential sum for this i atom from the interaction with this j atom. */
1064             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1065             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1066
1067             fscal            = felec;
1068
1069             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1070
1071             /* Update vectorial force */
1072             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
1073             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1074             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1075             
1076             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1077             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1078             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1079
1080             /**************************
1081              * CALCULATE INTERACTIONS *
1082              **************************/
1083
1084             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
1085
1086             /* Calculate table index by multiplying r with table scale and truncate to integer */
1087             rt               = _fjsp_mul_v2r8(r21,vftabscale);
1088             itab_tmp         = _fjsp_dtox_v2r8(rt);
1089             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1090             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1091             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1092
1093             vfconv.i[0]     *= 4;
1094             vfconv.i[1]     *= 4;
1095
1096             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1097             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1098             F                = _fjsp_setzero_v2r8();
1099             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1100             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1101             H                = _fjsp_setzero_v2r8();
1102             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1103             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1104             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
1105             velec            = _fjsp_mul_v2r8(qq21,VV);
1106             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1107             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
1108
1109             /* Update potential sum for this i atom from the interaction with this j atom. */
1110             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1111             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1112
1113             fscal            = felec;
1114
1115             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1116
1117             /* Update vectorial force */
1118             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1119             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1120             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1121             
1122             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1123             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1124             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1125
1126             /**************************
1127              * CALCULATE INTERACTIONS *
1128              **************************/
1129
1130             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
1131
1132             /* Calculate table index by multiplying r with table scale and truncate to integer */
1133             rt               = _fjsp_mul_v2r8(r22,vftabscale);
1134             itab_tmp         = _fjsp_dtox_v2r8(rt);
1135             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1136             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1137             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1138
1139             vfconv.i[0]     *= 4;
1140             vfconv.i[1]     *= 4;
1141
1142             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1143             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1144             F                = _fjsp_setzero_v2r8();
1145             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1146             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1147             H                = _fjsp_setzero_v2r8();
1148             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1149             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1150             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
1151             velec            = _fjsp_mul_v2r8(qq22,VV);
1152             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1153             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
1154
1155             /* Update potential sum for this i atom from the interaction with this j atom. */
1156             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1157             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1158
1159             fscal            = felec;
1160
1161             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1162
1163             /* Update vectorial force */
1164             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1165             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1166             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1167             
1168             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1169             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1170             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1171
1172             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1173
1174             /* Inner loop uses 427 flops */
1175         }
1176
1177         /* End of innermost loop */
1178
1179         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1180                                               f+i_coord_offset,fshift+i_shift_offset);
1181
1182         ggid                        = gid[iidx];
1183         /* Update potential energies */
1184         gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
1185         gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
1186
1187         /* Increment number of inner iterations */
1188         inneriter                  += j_index_end - j_index_start;
1189
1190         /* Outer loop uses 20 flops */
1191     }
1192
1193     /* Increment number of outer iterations */
1194     outeriter        += nri;
1195
1196     /* Update outer/inner flops */
1197
1198     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*427);
1199 }
1200 /*
1201  * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double
1202  * Electrostatics interaction: CubicSplineTable
1203  * VdW interaction:            LennardJones
1204  * Geometry:                   Water3-Water3
1205  * Calculate force/pot:        Force
1206  */
1207 void
1208 nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double
1209                     (t_nblist                    * gmx_restrict       nlist,
1210                      rvec                        * gmx_restrict          xx,
1211                      rvec                        * gmx_restrict          ff,
1212                      t_forcerec                  * gmx_restrict          fr,
1213                      t_mdatoms                   * gmx_restrict     mdatoms,
1214                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
1215                      t_nrnb                      * gmx_restrict        nrnb)
1216 {
1217     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1218      * just 0 for non-waters.
1219      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
1220      * jnr indices corresponding to data put in the four positions in the SIMD register.
1221      */
1222     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
1223     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1224     int              jnrA,jnrB;
1225     int              j_coord_offsetA,j_coord_offsetB;
1226     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
1227     real             rcutoff_scalar;
1228     real             *shiftvec,*fshift,*x,*f;
1229     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1230     int              vdwioffset0;
1231     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1232     int              vdwioffset1;
1233     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1234     int              vdwioffset2;
1235     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1236     int              vdwjidx0A,vdwjidx0B;
1237     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1238     int              vdwjidx1A,vdwjidx1B;
1239     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1240     int              vdwjidx2A,vdwjidx2B;
1241     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1242     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1243     _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
1244     _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
1245     _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
1246     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1247     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1248     _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
1249     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1250     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1251     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
1252     real             *charge;
1253     int              nvdwtype;
1254     _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
1255     int              *vdwtype;
1256     real             *vdwparam;
1257     _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
1258     _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
1259     _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
1260     real             *vftab;
1261     _fjsp_v2r8       itab_tmp;
1262     _fjsp_v2r8       dummy_mask,cutoff_mask;
1263     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
1264     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
1265     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1266
1267     x                = xx[0];
1268     f                = ff[0];
1269
1270     nri              = nlist->nri;
1271     iinr             = nlist->iinr;
1272     jindex           = nlist->jindex;
1273     jjnr             = nlist->jjnr;
1274     shiftidx         = nlist->shift;
1275     gid              = nlist->gid;
1276     shiftvec         = fr->shift_vec[0];
1277     fshift           = fr->fshift[0];
1278     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
1279     charge           = mdatoms->chargeA;
1280     nvdwtype         = fr->ntype;
1281     vdwparam         = fr->nbfp;
1282     vdwtype          = mdatoms->typeA;
1283
1284     vftab            = kernel_data->table_elec->data;
1285     vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
1286
1287     /* Setup water-specific parameters */
1288     inr              = nlist->iinr[0];
1289     iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
1290     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1291     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1292     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
1293
1294     jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
1295     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
1296     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
1297     vdwjidx0A        = 2*vdwtype[inr+0];
1298     qq00             = _fjsp_mul_v2r8(iq0,jq0);
1299     c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
1300     c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
1301     qq01             = _fjsp_mul_v2r8(iq0,jq1);
1302     qq02             = _fjsp_mul_v2r8(iq0,jq2);
1303     qq10             = _fjsp_mul_v2r8(iq1,jq0);
1304     qq11             = _fjsp_mul_v2r8(iq1,jq1);
1305     qq12             = _fjsp_mul_v2r8(iq1,jq2);
1306     qq20             = _fjsp_mul_v2r8(iq2,jq0);
1307     qq21             = _fjsp_mul_v2r8(iq2,jq1);
1308     qq22             = _fjsp_mul_v2r8(iq2,jq2);
1309
1310     /* Avoid stupid compiler warnings */
1311     jnrA = jnrB = 0;
1312     j_coord_offsetA = 0;
1313     j_coord_offsetB = 0;
1314
1315     outeriter        = 0;
1316     inneriter        = 0;
1317
1318     /* Start outer loop over neighborlists */
1319     for(iidx=0; iidx<nri; iidx++)
1320     {
1321         /* Load shift vector for this list */
1322         i_shift_offset   = DIM*shiftidx[iidx];
1323
1324         /* Load limits for loop over neighbors */
1325         j_index_start    = jindex[iidx];
1326         j_index_end      = jindex[iidx+1];
1327
1328         /* Get outer coordinate index */
1329         inr              = iinr[iidx];
1330         i_coord_offset   = DIM*inr;
1331
1332         /* Load i particle coords and add shift vector */
1333         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
1334                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
1335
1336         fix0             = _fjsp_setzero_v2r8();
1337         fiy0             = _fjsp_setzero_v2r8();
1338         fiz0             = _fjsp_setzero_v2r8();
1339         fix1             = _fjsp_setzero_v2r8();
1340         fiy1             = _fjsp_setzero_v2r8();
1341         fiz1             = _fjsp_setzero_v2r8();
1342         fix2             = _fjsp_setzero_v2r8();
1343         fiy2             = _fjsp_setzero_v2r8();
1344         fiz2             = _fjsp_setzero_v2r8();
1345
1346         /* Start inner kernel loop */
1347         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1348         {
1349
1350             /* Get j neighbor index, and coordinate index */
1351             jnrA             = jjnr[jidx];
1352             jnrB             = jjnr[jidx+1];
1353             j_coord_offsetA  = DIM*jnrA;
1354             j_coord_offsetB  = DIM*jnrB;
1355
1356             /* load j atom coordinates */
1357             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
1358                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1359
1360             /* Calculate displacement vector */
1361             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1362             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1363             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1364             dx01             = _fjsp_sub_v2r8(ix0,jx1);
1365             dy01             = _fjsp_sub_v2r8(iy0,jy1);
1366             dz01             = _fjsp_sub_v2r8(iz0,jz1);
1367             dx02             = _fjsp_sub_v2r8(ix0,jx2);
1368             dy02             = _fjsp_sub_v2r8(iy0,jy2);
1369             dz02             = _fjsp_sub_v2r8(iz0,jz2);
1370             dx10             = _fjsp_sub_v2r8(ix1,jx0);
1371             dy10             = _fjsp_sub_v2r8(iy1,jy0);
1372             dz10             = _fjsp_sub_v2r8(iz1,jz0);
1373             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1374             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1375             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1376             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1377             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1378             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1379             dx20             = _fjsp_sub_v2r8(ix2,jx0);
1380             dy20             = _fjsp_sub_v2r8(iy2,jy0);
1381             dz20             = _fjsp_sub_v2r8(iz2,jz0);
1382             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1383             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1384             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1385             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1386             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1387             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1388
1389             /* Calculate squared distance and things based on it */
1390             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1391             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1392             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1393             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1394             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1395             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1396             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1397             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1398             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1399
1400             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
1401             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
1402             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
1403             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
1404             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1405             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1406             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
1407             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1408             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1409
1410             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
1411
1412             fjx0             = _fjsp_setzero_v2r8();
1413             fjy0             = _fjsp_setzero_v2r8();
1414             fjz0             = _fjsp_setzero_v2r8();
1415             fjx1             = _fjsp_setzero_v2r8();
1416             fjy1             = _fjsp_setzero_v2r8();
1417             fjz1             = _fjsp_setzero_v2r8();
1418             fjx2             = _fjsp_setzero_v2r8();
1419             fjy2             = _fjsp_setzero_v2r8();
1420             fjz2             = _fjsp_setzero_v2r8();
1421
1422             /**************************
1423              * CALCULATE INTERACTIONS *
1424              **************************/
1425
1426             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
1427
1428             /* Calculate table index by multiplying r with table scale and truncate to integer */
1429             rt               = _fjsp_mul_v2r8(r00,vftabscale);
1430             itab_tmp         = _fjsp_dtox_v2r8(rt);
1431             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1432             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1433             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1434
1435             vfconv.i[0]     *= 4;
1436             vfconv.i[1]     *= 4;
1437
1438             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1439             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1440             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1441             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1442             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1443             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1444             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1445             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1446             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1447             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
1448
1449             /* LENNARD-JONES DISPERSION/REPULSION */
1450
1451             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
1452             fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
1453
1454             fscal            = _fjsp_add_v2r8(felec,fvdw);
1455
1456             /* Update vectorial force */
1457             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1458             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1459             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1460             
1461             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1462             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1463             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1464
1465             /**************************
1466              * CALCULATE INTERACTIONS *
1467              **************************/
1468
1469             r01              = _fjsp_mul_v2r8(rsq01,rinv01);
1470
1471             /* Calculate table index by multiplying r with table scale and truncate to integer */
1472             rt               = _fjsp_mul_v2r8(r01,vftabscale);
1473             itab_tmp         = _fjsp_dtox_v2r8(rt);
1474             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1475             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1476             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1477
1478             vfconv.i[0]     *= 4;
1479             vfconv.i[1]     *= 4;
1480
1481             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1482             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1483             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1484             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1485             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1486             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1487             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1488             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1489             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1490             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
1491
1492             fscal            = felec;
1493
1494             /* Update vectorial force */
1495             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
1496             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1497             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1498             
1499             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1500             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1501             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1502
1503             /**************************
1504              * CALCULATE INTERACTIONS *
1505              **************************/
1506
1507             r02              = _fjsp_mul_v2r8(rsq02,rinv02);
1508
1509             /* Calculate table index by multiplying r with table scale and truncate to integer */
1510             rt               = _fjsp_mul_v2r8(r02,vftabscale);
1511             itab_tmp         = _fjsp_dtox_v2r8(rt);
1512             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1513             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1514             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1515
1516             vfconv.i[0]     *= 4;
1517             vfconv.i[1]     *= 4;
1518
1519             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1520             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1521             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1522             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1523             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1524             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1525             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1526             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1527             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1528             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
1529
1530             fscal            = felec;
1531
1532             /* Update vectorial force */
1533             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
1534             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1535             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1536             
1537             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1538             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1539             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1540
1541             /**************************
1542              * CALCULATE INTERACTIONS *
1543              **************************/
1544
1545             r10              = _fjsp_mul_v2r8(rsq10,rinv10);
1546
1547             /* Calculate table index by multiplying r with table scale and truncate to integer */
1548             rt               = _fjsp_mul_v2r8(r10,vftabscale);
1549             itab_tmp         = _fjsp_dtox_v2r8(rt);
1550             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1551             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1552             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1553
1554             vfconv.i[0]     *= 4;
1555             vfconv.i[1]     *= 4;
1556
1557             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1558             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1559             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1560             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1561             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1562             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1563             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1564             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1565             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1566             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
1567
1568             fscal            = felec;
1569
1570             /* Update vectorial force */
1571             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
1572             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1573             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1574             
1575             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1576             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1577             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1578
1579             /**************************
1580              * CALCULATE INTERACTIONS *
1581              **************************/
1582
1583             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
1584
1585             /* Calculate table index by multiplying r with table scale and truncate to integer */
1586             rt               = _fjsp_mul_v2r8(r11,vftabscale);
1587             itab_tmp         = _fjsp_dtox_v2r8(rt);
1588             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1589             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1590             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1591
1592             vfconv.i[0]     *= 4;
1593             vfconv.i[1]     *= 4;
1594
1595             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1596             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1597             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1598             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1599             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1600             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1601             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1602             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1603             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1604             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
1605
1606             fscal            = felec;
1607
1608             /* Update vectorial force */
1609             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1610             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1611             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1612             
1613             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1614             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1615             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1616
1617             /**************************
1618              * CALCULATE INTERACTIONS *
1619              **************************/
1620
1621             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
1622
1623             /* Calculate table index by multiplying r with table scale and truncate to integer */
1624             rt               = _fjsp_mul_v2r8(r12,vftabscale);
1625             itab_tmp         = _fjsp_dtox_v2r8(rt);
1626             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1627             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1628             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1629
1630             vfconv.i[0]     *= 4;
1631             vfconv.i[1]     *= 4;
1632
1633             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1634             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1635             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1636             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1637             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1638             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1639             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1640             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1641             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1642             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
1643
1644             fscal            = felec;
1645
1646             /* Update vectorial force */
1647             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1648             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1649             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1650             
1651             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1652             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1653             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1654
1655             /**************************
1656              * CALCULATE INTERACTIONS *
1657              **************************/
1658
1659             r20              = _fjsp_mul_v2r8(rsq20,rinv20);
1660
1661             /* Calculate table index by multiplying r with table scale and truncate to integer */
1662             rt               = _fjsp_mul_v2r8(r20,vftabscale);
1663             itab_tmp         = _fjsp_dtox_v2r8(rt);
1664             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1665             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1666             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1667
1668             vfconv.i[0]     *= 4;
1669             vfconv.i[1]     *= 4;
1670
1671             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1672             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1673             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1674             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1675             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1676             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1677             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1678             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1679             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1680             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
1681
1682             fscal            = felec;
1683
1684             /* Update vectorial force */
1685             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
1686             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1687             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1688             
1689             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1690             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1691             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1692
1693             /**************************
1694              * CALCULATE INTERACTIONS *
1695              **************************/
1696
1697             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
1698
1699             /* Calculate table index by multiplying r with table scale and truncate to integer */
1700             rt               = _fjsp_mul_v2r8(r21,vftabscale);
1701             itab_tmp         = _fjsp_dtox_v2r8(rt);
1702             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1703             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1704             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1705
1706             vfconv.i[0]     *= 4;
1707             vfconv.i[1]     *= 4;
1708
1709             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1710             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1711             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1712             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1713             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1714             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1715             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1716             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1717             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1718             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
1719
1720             fscal            = felec;
1721
1722             /* Update vectorial force */
1723             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1724             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1725             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1726             
1727             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1728             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1729             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1730
1731             /**************************
1732              * CALCULATE INTERACTIONS *
1733              **************************/
1734
1735             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
1736
1737             /* Calculate table index by multiplying r with table scale and truncate to integer */
1738             rt               = _fjsp_mul_v2r8(r22,vftabscale);
1739             itab_tmp         = _fjsp_dtox_v2r8(rt);
1740             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1741             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1742             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1743
1744             vfconv.i[0]     *= 4;
1745             vfconv.i[1]     *= 4;
1746
1747             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1748             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1749             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1750             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1751             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1752             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1753             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1754             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1755             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1756             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
1757
1758             fscal            = felec;
1759
1760             /* Update vectorial force */
1761             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1762             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1763             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1764             
1765             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1766             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1767             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1768
1769             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1770
1771             /* Inner loop uses 386 flops */
1772         }
1773
1774         if(jidx<j_index_end)
1775         {
1776
1777             jnrA             = jjnr[jidx];
1778             j_coord_offsetA  = DIM*jnrA;
1779
1780             /* load j atom coordinates */
1781             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1782                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1783
1784             /* Calculate displacement vector */
1785             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1786             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1787             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1788             dx01             = _fjsp_sub_v2r8(ix0,jx1);
1789             dy01             = _fjsp_sub_v2r8(iy0,jy1);
1790             dz01             = _fjsp_sub_v2r8(iz0,jz1);
1791             dx02             = _fjsp_sub_v2r8(ix0,jx2);
1792             dy02             = _fjsp_sub_v2r8(iy0,jy2);
1793             dz02             = _fjsp_sub_v2r8(iz0,jz2);
1794             dx10             = _fjsp_sub_v2r8(ix1,jx0);
1795             dy10             = _fjsp_sub_v2r8(iy1,jy0);
1796             dz10             = _fjsp_sub_v2r8(iz1,jz0);
1797             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1798             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1799             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1800             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1801             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1802             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1803             dx20             = _fjsp_sub_v2r8(ix2,jx0);
1804             dy20             = _fjsp_sub_v2r8(iy2,jy0);
1805             dz20             = _fjsp_sub_v2r8(iz2,jz0);
1806             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1807             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1808             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1809             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1810             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1811             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1812
1813             /* Calculate squared distance and things based on it */
1814             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1815             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1816             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1817             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1818             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1819             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1820             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1821             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1822             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1823
1824             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
1825             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
1826             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
1827             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
1828             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1829             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1830             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
1831             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1832             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1833
1834             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
1835
1836             fjx0             = _fjsp_setzero_v2r8();
1837             fjy0             = _fjsp_setzero_v2r8();
1838             fjz0             = _fjsp_setzero_v2r8();
1839             fjx1             = _fjsp_setzero_v2r8();
1840             fjy1             = _fjsp_setzero_v2r8();
1841             fjz1             = _fjsp_setzero_v2r8();
1842             fjx2             = _fjsp_setzero_v2r8();
1843             fjy2             = _fjsp_setzero_v2r8();
1844             fjz2             = _fjsp_setzero_v2r8();
1845
1846             /**************************
1847              * CALCULATE INTERACTIONS *
1848              **************************/
1849
1850             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
1851
1852             /* Calculate table index by multiplying r with table scale and truncate to integer */
1853             rt               = _fjsp_mul_v2r8(r00,vftabscale);
1854             itab_tmp         = _fjsp_dtox_v2r8(rt);
1855             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1856             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1857             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1858
1859             vfconv.i[0]     *= 4;
1860             vfconv.i[1]     *= 4;
1861
1862             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1863             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1864             F                = _fjsp_setzero_v2r8();
1865             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1866             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1867             H                = _fjsp_setzero_v2r8();
1868             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1869             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1870             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1871             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
1872
1873             /* LENNARD-JONES DISPERSION/REPULSION */
1874
1875             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
1876             fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
1877
1878             fscal            = _fjsp_add_v2r8(felec,fvdw);
1879
1880             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1881
1882             /* Update vectorial force */
1883             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1884             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1885             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1886             
1887             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1888             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1889             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1890
1891             /**************************
1892              * CALCULATE INTERACTIONS *
1893              **************************/
1894
1895             r01              = _fjsp_mul_v2r8(rsq01,rinv01);
1896
1897             /* Calculate table index by multiplying r with table scale and truncate to integer */
1898             rt               = _fjsp_mul_v2r8(r01,vftabscale);
1899             itab_tmp         = _fjsp_dtox_v2r8(rt);
1900             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1901             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1902             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1903
1904             vfconv.i[0]     *= 4;
1905             vfconv.i[1]     *= 4;
1906
1907             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1908             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1909             F                = _fjsp_setzero_v2r8();
1910             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1911             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1912             H                = _fjsp_setzero_v2r8();
1913             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1914             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1915             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1916             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
1917
1918             fscal            = felec;
1919
1920             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1921
1922             /* Update vectorial force */
1923             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
1924             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1925             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1926             
1927             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1928             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1929             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1930
1931             /**************************
1932              * CALCULATE INTERACTIONS *
1933              **************************/
1934
1935             r02              = _fjsp_mul_v2r8(rsq02,rinv02);
1936
1937             /* Calculate table index by multiplying r with table scale and truncate to integer */
1938             rt               = _fjsp_mul_v2r8(r02,vftabscale);
1939             itab_tmp         = _fjsp_dtox_v2r8(rt);
1940             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1941             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1942             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1943
1944             vfconv.i[0]     *= 4;
1945             vfconv.i[1]     *= 4;
1946
1947             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1948             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1949             F                = _fjsp_setzero_v2r8();
1950             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1951             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1952             H                = _fjsp_setzero_v2r8();
1953             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1954             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1955             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1956             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
1957
1958             fscal            = felec;
1959
1960             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1961
1962             /* Update vectorial force */
1963             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
1964             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1965             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1966             
1967             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1968             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1969             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1970
1971             /**************************
1972              * CALCULATE INTERACTIONS *
1973              **************************/
1974
1975             r10              = _fjsp_mul_v2r8(rsq10,rinv10);
1976
1977             /* Calculate table index by multiplying r with table scale and truncate to integer */
1978             rt               = _fjsp_mul_v2r8(r10,vftabscale);
1979             itab_tmp         = _fjsp_dtox_v2r8(rt);
1980             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1981             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1982             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1983
1984             vfconv.i[0]     *= 4;
1985             vfconv.i[1]     *= 4;
1986
1987             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1988             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1989             F                = _fjsp_setzero_v2r8();
1990             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1991             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1992             H                = _fjsp_setzero_v2r8();
1993             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1994             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1995             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1996             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
1997
1998             fscal            = felec;
1999
2000             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2001
2002             /* Update vectorial force */
2003             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
2004             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
2005             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
2006             
2007             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
2008             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
2009             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
2010
2011             /**************************
2012              * CALCULATE INTERACTIONS *
2013              **************************/
2014
2015             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
2016
2017             /* Calculate table index by multiplying r with table scale and truncate to integer */
2018             rt               = _fjsp_mul_v2r8(r11,vftabscale);
2019             itab_tmp         = _fjsp_dtox_v2r8(rt);
2020             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2021             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
2022             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2023
2024             vfconv.i[0]     *= 4;
2025             vfconv.i[1]     *= 4;
2026
2027             /* CUBIC SPLINE TABLE ELECTROSTATICS */
2028             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2029             F                = _fjsp_setzero_v2r8();
2030             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2031             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2032             H                = _fjsp_setzero_v2r8();
2033             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2034             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2035             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2036             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
2037
2038             fscal            = felec;
2039
2040             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2041
2042             /* Update vectorial force */
2043             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
2044             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
2045             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
2046             
2047             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
2048             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
2049             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
2050
2051             /**************************
2052              * CALCULATE INTERACTIONS *
2053              **************************/
2054
2055             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
2056
2057             /* Calculate table index by multiplying r with table scale and truncate to integer */
2058             rt               = _fjsp_mul_v2r8(r12,vftabscale);
2059             itab_tmp         = _fjsp_dtox_v2r8(rt);
2060             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2061             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
2062             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2063
2064             vfconv.i[0]     *= 4;
2065             vfconv.i[1]     *= 4;
2066
2067             /* CUBIC SPLINE TABLE ELECTROSTATICS */
2068             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2069             F                = _fjsp_setzero_v2r8();
2070             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2071             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2072             H                = _fjsp_setzero_v2r8();
2073             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2074             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2075             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2076             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
2077
2078             fscal            = felec;
2079
2080             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2081
2082             /* Update vectorial force */
2083             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
2084             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
2085             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
2086             
2087             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
2088             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
2089             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
2090
2091             /**************************
2092              * CALCULATE INTERACTIONS *
2093              **************************/
2094
2095             r20              = _fjsp_mul_v2r8(rsq20,rinv20);
2096
2097             /* Calculate table index by multiplying r with table scale and truncate to integer */
2098             rt               = _fjsp_mul_v2r8(r20,vftabscale);
2099             itab_tmp         = _fjsp_dtox_v2r8(rt);
2100             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2101             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
2102             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2103
2104             vfconv.i[0]     *= 4;
2105             vfconv.i[1]     *= 4;
2106
2107             /* CUBIC SPLINE TABLE ELECTROSTATICS */
2108             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2109             F                = _fjsp_setzero_v2r8();
2110             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2111             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2112             H                = _fjsp_setzero_v2r8();
2113             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2114             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2115             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2116             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
2117
2118             fscal            = felec;
2119
2120             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2121
2122             /* Update vectorial force */
2123             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
2124             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
2125             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
2126             
2127             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
2128             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
2129             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
2130
2131             /**************************
2132              * CALCULATE INTERACTIONS *
2133              **************************/
2134
2135             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
2136
2137             /* Calculate table index by multiplying r with table scale and truncate to integer */
2138             rt               = _fjsp_mul_v2r8(r21,vftabscale);
2139             itab_tmp         = _fjsp_dtox_v2r8(rt);
2140             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2141             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
2142             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2143
2144             vfconv.i[0]     *= 4;
2145             vfconv.i[1]     *= 4;
2146
2147             /* CUBIC SPLINE TABLE ELECTROSTATICS */
2148             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2149             F                = _fjsp_setzero_v2r8();
2150             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2151             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2152             H                = _fjsp_setzero_v2r8();
2153             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2154             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2155             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2156             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
2157
2158             fscal            = felec;
2159
2160             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2161
2162             /* Update vectorial force */
2163             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
2164             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
2165             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
2166             
2167             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
2168             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
2169             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
2170
2171             /**************************
2172              * CALCULATE INTERACTIONS *
2173              **************************/
2174
2175             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
2176
2177             /* Calculate table index by multiplying r with table scale and truncate to integer */
2178             rt               = _fjsp_mul_v2r8(r22,vftabscale);
2179             itab_tmp         = _fjsp_dtox_v2r8(rt);
2180             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2181             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
2182             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2183
2184             vfconv.i[0]     *= 4;
2185             vfconv.i[1]     *= 4;
2186
2187             /* CUBIC SPLINE TABLE ELECTROSTATICS */
2188             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2189             F                = _fjsp_setzero_v2r8();
2190             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2191             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2192             H                = _fjsp_setzero_v2r8();
2193             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2194             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2195             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2196             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
2197
2198             fscal            = felec;
2199
2200             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2201
2202             /* Update vectorial force */
2203             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
2204             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
2205             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
2206             
2207             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
2208             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
2209             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
2210
2211             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
2212
2213             /* Inner loop uses 386 flops */
2214         }
2215
2216         /* End of innermost loop */
2217
2218         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
2219                                               f+i_coord_offset,fshift+i_shift_offset);
2220
2221         /* Increment number of inner iterations */
2222         inneriter                  += j_index_end - j_index_start;
2223
2224         /* Outer loop uses 18 flops */
2225     }
2226
2227     /* Increment number of outer iterations */
2228     outeriter        += nri;
2229
2230     /* Update outer/inner flops */
2231
2232     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*386);
2233 }