Remove no-inline-max-size and suppress remark
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_sparc64_hpc_ace_double / nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_sparc64_hpc_ace_double.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
37  */
38 #ifdef HAVE_CONFIG_H
39 #include <config.h>
40 #endif
41
42 #include <math.h>
43
44 #include "../nb_kernel.h"
45 #include "types/simple.h"
46 #include "gromacs/legacyheaders/vec.h"
47 #include "nrnb.h"
48
49 #include "kernelutil_sparc64_hpc_ace_double.h"
50
51 /*
52  * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
53  * Electrostatics interaction: CubicSplineTable
54  * VdW interaction:            CubicSplineTable
55  * Geometry:                   Water3-Water3
56  * Calculate force/pot:        PotentialAndForce
57  */
58 void
59 nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
60                     (t_nblist                    * gmx_restrict       nlist,
61                      rvec                        * gmx_restrict          xx,
62                      rvec                        * gmx_restrict          ff,
63                      t_forcerec                  * gmx_restrict          fr,
64                      t_mdatoms                   * gmx_restrict     mdatoms,
65                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
66                      t_nrnb                      * gmx_restrict        nrnb)
67 {
68     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
69      * just 0 for non-waters.
70      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
71      * jnr indices corresponding to data put in the four positions in the SIMD register.
72      */
73     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
74     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
75     int              jnrA,jnrB;
76     int              j_coord_offsetA,j_coord_offsetB;
77     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
78     real             rcutoff_scalar;
79     real             *shiftvec,*fshift,*x,*f;
80     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
81     int              vdwioffset0;
82     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
83     int              vdwioffset1;
84     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
85     int              vdwioffset2;
86     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
87     int              vdwjidx0A,vdwjidx0B;
88     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
89     int              vdwjidx1A,vdwjidx1B;
90     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
91     int              vdwjidx2A,vdwjidx2B;
92     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
93     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
94     _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
95     _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
96     _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
97     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
98     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
99     _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
100     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
101     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
102     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
103     real             *charge;
104     int              nvdwtype;
105     _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
106     int              *vdwtype;
107     real             *vdwparam;
108     _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
109     _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
110     _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
111     real             *vftab;
112     _fjsp_v2r8       itab_tmp;
113     _fjsp_v2r8       dummy_mask,cutoff_mask;
114     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
115     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
116     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
117
118     x                = xx[0];
119     f                = ff[0];
120
121     nri              = nlist->nri;
122     iinr             = nlist->iinr;
123     jindex           = nlist->jindex;
124     jjnr             = nlist->jjnr;
125     shiftidx         = nlist->shift;
126     gid              = nlist->gid;
127     shiftvec         = fr->shift_vec[0];
128     fshift           = fr->fshift[0];
129     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
130     charge           = mdatoms->chargeA;
131     nvdwtype         = fr->ntype;
132     vdwparam         = fr->nbfp;
133     vdwtype          = mdatoms->typeA;
134
135     vftab            = kernel_data->table_elec_vdw->data;
136     vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale);
137
138     /* Setup water-specific parameters */
139     inr              = nlist->iinr[0];
140     iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
141     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
142     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
143     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
144
145     jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
146     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
147     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
148     vdwjidx0A        = 2*vdwtype[inr+0];
149     qq00             = _fjsp_mul_v2r8(iq0,jq0);
150     c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
151     c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
152     qq01             = _fjsp_mul_v2r8(iq0,jq1);
153     qq02             = _fjsp_mul_v2r8(iq0,jq2);
154     qq10             = _fjsp_mul_v2r8(iq1,jq0);
155     qq11             = _fjsp_mul_v2r8(iq1,jq1);
156     qq12             = _fjsp_mul_v2r8(iq1,jq2);
157     qq20             = _fjsp_mul_v2r8(iq2,jq0);
158     qq21             = _fjsp_mul_v2r8(iq2,jq1);
159     qq22             = _fjsp_mul_v2r8(iq2,jq2);
160
161     /* Avoid stupid compiler warnings */
162     jnrA = jnrB = 0;
163     j_coord_offsetA = 0;
164     j_coord_offsetB = 0;
165
166     outeriter        = 0;
167     inneriter        = 0;
168
169     /* Start outer loop over neighborlists */
170     for(iidx=0; iidx<nri; iidx++)
171     {
172         /* Load shift vector for this list */
173         i_shift_offset   = DIM*shiftidx[iidx];
174
175         /* Load limits for loop over neighbors */
176         j_index_start    = jindex[iidx];
177         j_index_end      = jindex[iidx+1];
178
179         /* Get outer coordinate index */
180         inr              = iinr[iidx];
181         i_coord_offset   = DIM*inr;
182
183         /* Load i particle coords and add shift vector */
184         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
185                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
186
187         fix0             = _fjsp_setzero_v2r8();
188         fiy0             = _fjsp_setzero_v2r8();
189         fiz0             = _fjsp_setzero_v2r8();
190         fix1             = _fjsp_setzero_v2r8();
191         fiy1             = _fjsp_setzero_v2r8();
192         fiz1             = _fjsp_setzero_v2r8();
193         fix2             = _fjsp_setzero_v2r8();
194         fiy2             = _fjsp_setzero_v2r8();
195         fiz2             = _fjsp_setzero_v2r8();
196
197         /* Reset potential sums */
198         velecsum         = _fjsp_setzero_v2r8();
199         vvdwsum          = _fjsp_setzero_v2r8();
200
201         /* Start inner kernel loop */
202         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
203         {
204
205             /* Get j neighbor index, and coordinate index */
206             jnrA             = jjnr[jidx];
207             jnrB             = jjnr[jidx+1];
208             j_coord_offsetA  = DIM*jnrA;
209             j_coord_offsetB  = DIM*jnrB;
210
211             /* load j atom coordinates */
212             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
213                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
214
215             /* Calculate displacement vector */
216             dx00             = _fjsp_sub_v2r8(ix0,jx0);
217             dy00             = _fjsp_sub_v2r8(iy0,jy0);
218             dz00             = _fjsp_sub_v2r8(iz0,jz0);
219             dx01             = _fjsp_sub_v2r8(ix0,jx1);
220             dy01             = _fjsp_sub_v2r8(iy0,jy1);
221             dz01             = _fjsp_sub_v2r8(iz0,jz1);
222             dx02             = _fjsp_sub_v2r8(ix0,jx2);
223             dy02             = _fjsp_sub_v2r8(iy0,jy2);
224             dz02             = _fjsp_sub_v2r8(iz0,jz2);
225             dx10             = _fjsp_sub_v2r8(ix1,jx0);
226             dy10             = _fjsp_sub_v2r8(iy1,jy0);
227             dz10             = _fjsp_sub_v2r8(iz1,jz0);
228             dx11             = _fjsp_sub_v2r8(ix1,jx1);
229             dy11             = _fjsp_sub_v2r8(iy1,jy1);
230             dz11             = _fjsp_sub_v2r8(iz1,jz1);
231             dx12             = _fjsp_sub_v2r8(ix1,jx2);
232             dy12             = _fjsp_sub_v2r8(iy1,jy2);
233             dz12             = _fjsp_sub_v2r8(iz1,jz2);
234             dx20             = _fjsp_sub_v2r8(ix2,jx0);
235             dy20             = _fjsp_sub_v2r8(iy2,jy0);
236             dz20             = _fjsp_sub_v2r8(iz2,jz0);
237             dx21             = _fjsp_sub_v2r8(ix2,jx1);
238             dy21             = _fjsp_sub_v2r8(iy2,jy1);
239             dz21             = _fjsp_sub_v2r8(iz2,jz1);
240             dx22             = _fjsp_sub_v2r8(ix2,jx2);
241             dy22             = _fjsp_sub_v2r8(iy2,jy2);
242             dz22             = _fjsp_sub_v2r8(iz2,jz2);
243
244             /* Calculate squared distance and things based on it */
245             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
246             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
247             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
248             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
249             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
250             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
251             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
252             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
253             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
254
255             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
256             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
257             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
258             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
259             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
260             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
261             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
262             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
263             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
264
265             fjx0             = _fjsp_setzero_v2r8();
266             fjy0             = _fjsp_setzero_v2r8();
267             fjz0             = _fjsp_setzero_v2r8();
268             fjx1             = _fjsp_setzero_v2r8();
269             fjy1             = _fjsp_setzero_v2r8();
270             fjz1             = _fjsp_setzero_v2r8();
271             fjx2             = _fjsp_setzero_v2r8();
272             fjy2             = _fjsp_setzero_v2r8();
273             fjz2             = _fjsp_setzero_v2r8();
274
275             /**************************
276              * CALCULATE INTERACTIONS *
277              **************************/
278
279             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
280
281             /* Calculate table index by multiplying r with table scale and truncate to integer */
282             rt               = _fjsp_mul_v2r8(r00,vftabscale);
283             itab_tmp         = _fjsp_dtox_v2r8(rt);
284             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
285             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
286             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
287
288             vfconv.i[0]     *= 12;
289             vfconv.i[1]     *= 12;
290
291             /* CUBIC SPLINE TABLE ELECTROSTATICS */
292             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
293             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
294             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
295             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
296             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
297             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
298             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
299             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
300             velec            = _fjsp_mul_v2r8(qq00,VV);
301             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
302             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
303
304             /* CUBIC SPLINE TABLE DISPERSION */
305             vfconv.i[0]       += 4;
306             vfconv.i[1]       += 4;
307             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
308             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
309             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
310             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
311             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
312             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
313             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
314             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
315             vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
316             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
317             fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
318
319             /* CUBIC SPLINE TABLE REPULSION */
320             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
321             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
322             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
323             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
324             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
325             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
326             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
327             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
328             vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
329             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
330             fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
331             vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
332             fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
333
334             /* Update potential sum for this i atom from the interaction with this j atom. */
335             velecsum         = _fjsp_add_v2r8(velecsum,velec);
336             vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
337
338             fscal            = _fjsp_add_v2r8(felec,fvdw);
339
340             /* Update vectorial force */
341             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
342             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
343             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
344             
345             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
346             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
347             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
348
349             /**************************
350              * CALCULATE INTERACTIONS *
351              **************************/
352
353             r01              = _fjsp_mul_v2r8(rsq01,rinv01);
354
355             /* Calculate table index by multiplying r with table scale and truncate to integer */
356             rt               = _fjsp_mul_v2r8(r01,vftabscale);
357             itab_tmp         = _fjsp_dtox_v2r8(rt);
358             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
359             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
360             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
361
362             vfconv.i[0]     *= 12;
363             vfconv.i[1]     *= 12;
364
365             /* CUBIC SPLINE TABLE ELECTROSTATICS */
366             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
367             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
368             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
369             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
370             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
371             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
372             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
373             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
374             velec            = _fjsp_mul_v2r8(qq01,VV);
375             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
376             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
377
378             /* Update potential sum for this i atom from the interaction with this j atom. */
379             velecsum         = _fjsp_add_v2r8(velecsum,velec);
380
381             fscal            = felec;
382
383             /* Update vectorial force */
384             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
385             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
386             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
387             
388             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
389             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
390             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
391
392             /**************************
393              * CALCULATE INTERACTIONS *
394              **************************/
395
396             r02              = _fjsp_mul_v2r8(rsq02,rinv02);
397
398             /* Calculate table index by multiplying r with table scale and truncate to integer */
399             rt               = _fjsp_mul_v2r8(r02,vftabscale);
400             itab_tmp         = _fjsp_dtox_v2r8(rt);
401             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
402             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
403             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
404
405             vfconv.i[0]     *= 12;
406             vfconv.i[1]     *= 12;
407
408             /* CUBIC SPLINE TABLE ELECTROSTATICS */
409             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
410             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
411             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
412             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
413             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
414             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
415             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
416             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
417             velec            = _fjsp_mul_v2r8(qq02,VV);
418             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
419             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
420
421             /* Update potential sum for this i atom from the interaction with this j atom. */
422             velecsum         = _fjsp_add_v2r8(velecsum,velec);
423
424             fscal            = felec;
425
426             /* Update vectorial force */
427             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
428             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
429             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
430             
431             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
432             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
433             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
434
435             /**************************
436              * CALCULATE INTERACTIONS *
437              **************************/
438
439             r10              = _fjsp_mul_v2r8(rsq10,rinv10);
440
441             /* Calculate table index by multiplying r with table scale and truncate to integer */
442             rt               = _fjsp_mul_v2r8(r10,vftabscale);
443             itab_tmp         = _fjsp_dtox_v2r8(rt);
444             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
445             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
446             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
447
448             vfconv.i[0]     *= 12;
449             vfconv.i[1]     *= 12;
450
451             /* CUBIC SPLINE TABLE ELECTROSTATICS */
452             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
453             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
454             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
455             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
456             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
457             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
458             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
459             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
460             velec            = _fjsp_mul_v2r8(qq10,VV);
461             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
462             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
463
464             /* Update potential sum for this i atom from the interaction with this j atom. */
465             velecsum         = _fjsp_add_v2r8(velecsum,velec);
466
467             fscal            = felec;
468
469             /* Update vectorial force */
470             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
471             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
472             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
473             
474             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
475             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
476             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
477
478             /**************************
479              * CALCULATE INTERACTIONS *
480              **************************/
481
482             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
483
484             /* Calculate table index by multiplying r with table scale and truncate to integer */
485             rt               = _fjsp_mul_v2r8(r11,vftabscale);
486             itab_tmp         = _fjsp_dtox_v2r8(rt);
487             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
488             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
489             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
490
491             vfconv.i[0]     *= 12;
492             vfconv.i[1]     *= 12;
493
494             /* CUBIC SPLINE TABLE ELECTROSTATICS */
495             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
496             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
497             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
498             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
499             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
500             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
501             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
502             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
503             velec            = _fjsp_mul_v2r8(qq11,VV);
504             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
505             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
506
507             /* Update potential sum for this i atom from the interaction with this j atom. */
508             velecsum         = _fjsp_add_v2r8(velecsum,velec);
509
510             fscal            = felec;
511
512             /* Update vectorial force */
513             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
514             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
515             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
516             
517             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
518             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
519             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
520
521             /**************************
522              * CALCULATE INTERACTIONS *
523              **************************/
524
525             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
526
527             /* Calculate table index by multiplying r with table scale and truncate to integer */
528             rt               = _fjsp_mul_v2r8(r12,vftabscale);
529             itab_tmp         = _fjsp_dtox_v2r8(rt);
530             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
531             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
532             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
533
534             vfconv.i[0]     *= 12;
535             vfconv.i[1]     *= 12;
536
537             /* CUBIC SPLINE TABLE ELECTROSTATICS */
538             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
539             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
540             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
541             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
542             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
543             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
544             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
545             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
546             velec            = _fjsp_mul_v2r8(qq12,VV);
547             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
548             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
549
550             /* Update potential sum for this i atom from the interaction with this j atom. */
551             velecsum         = _fjsp_add_v2r8(velecsum,velec);
552
553             fscal            = felec;
554
555             /* Update vectorial force */
556             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
557             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
558             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
559             
560             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
561             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
562             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
563
564             /**************************
565              * CALCULATE INTERACTIONS *
566              **************************/
567
568             r20              = _fjsp_mul_v2r8(rsq20,rinv20);
569
570             /* Calculate table index by multiplying r with table scale and truncate to integer */
571             rt               = _fjsp_mul_v2r8(r20,vftabscale);
572             itab_tmp         = _fjsp_dtox_v2r8(rt);
573             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
574             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
575             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
576
577             vfconv.i[0]     *= 12;
578             vfconv.i[1]     *= 12;
579
580             /* CUBIC SPLINE TABLE ELECTROSTATICS */
581             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
582             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
583             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
584             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
585             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
586             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
587             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
588             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
589             velec            = _fjsp_mul_v2r8(qq20,VV);
590             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
591             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
592
593             /* Update potential sum for this i atom from the interaction with this j atom. */
594             velecsum         = _fjsp_add_v2r8(velecsum,velec);
595
596             fscal            = felec;
597
598             /* Update vectorial force */
599             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
600             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
601             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
602             
603             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
604             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
605             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
606
607             /**************************
608              * CALCULATE INTERACTIONS *
609              **************************/
610
611             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
612
613             /* Calculate table index by multiplying r with table scale and truncate to integer */
614             rt               = _fjsp_mul_v2r8(r21,vftabscale);
615             itab_tmp         = _fjsp_dtox_v2r8(rt);
616             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
617             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
618             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
619
620             vfconv.i[0]     *= 12;
621             vfconv.i[1]     *= 12;
622
623             /* CUBIC SPLINE TABLE ELECTROSTATICS */
624             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
625             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
626             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
627             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
628             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
629             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
630             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
631             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
632             velec            = _fjsp_mul_v2r8(qq21,VV);
633             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
634             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
635
636             /* Update potential sum for this i atom from the interaction with this j atom. */
637             velecsum         = _fjsp_add_v2r8(velecsum,velec);
638
639             fscal            = felec;
640
641             /* Update vectorial force */
642             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
643             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
644             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
645             
646             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
647             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
648             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
649
650             /**************************
651              * CALCULATE INTERACTIONS *
652              **************************/
653
654             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
655
656             /* Calculate table index by multiplying r with table scale and truncate to integer */
657             rt               = _fjsp_mul_v2r8(r22,vftabscale);
658             itab_tmp         = _fjsp_dtox_v2r8(rt);
659             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
660             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
661             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
662
663             vfconv.i[0]     *= 12;
664             vfconv.i[1]     *= 12;
665
666             /* CUBIC SPLINE TABLE ELECTROSTATICS */
667             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
668             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
669             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
670             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
671             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
672             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
673             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
674             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
675             velec            = _fjsp_mul_v2r8(qq22,VV);
676             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
677             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
678
679             /* Update potential sum for this i atom from the interaction with this j atom. */
680             velecsum         = _fjsp_add_v2r8(velecsum,velec);
681
682             fscal            = felec;
683
684             /* Update vectorial force */
685             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
686             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
687             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
688             
689             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
690             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
691             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
692
693             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
694
695             /* Inner loop uses 444 flops */
696         }
697
698         if(jidx<j_index_end)
699         {
700
701             jnrA             = jjnr[jidx];
702             j_coord_offsetA  = DIM*jnrA;
703
704             /* load j atom coordinates */
705             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
706                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
707
708             /* Calculate displacement vector */
709             dx00             = _fjsp_sub_v2r8(ix0,jx0);
710             dy00             = _fjsp_sub_v2r8(iy0,jy0);
711             dz00             = _fjsp_sub_v2r8(iz0,jz0);
712             dx01             = _fjsp_sub_v2r8(ix0,jx1);
713             dy01             = _fjsp_sub_v2r8(iy0,jy1);
714             dz01             = _fjsp_sub_v2r8(iz0,jz1);
715             dx02             = _fjsp_sub_v2r8(ix0,jx2);
716             dy02             = _fjsp_sub_v2r8(iy0,jy2);
717             dz02             = _fjsp_sub_v2r8(iz0,jz2);
718             dx10             = _fjsp_sub_v2r8(ix1,jx0);
719             dy10             = _fjsp_sub_v2r8(iy1,jy0);
720             dz10             = _fjsp_sub_v2r8(iz1,jz0);
721             dx11             = _fjsp_sub_v2r8(ix1,jx1);
722             dy11             = _fjsp_sub_v2r8(iy1,jy1);
723             dz11             = _fjsp_sub_v2r8(iz1,jz1);
724             dx12             = _fjsp_sub_v2r8(ix1,jx2);
725             dy12             = _fjsp_sub_v2r8(iy1,jy2);
726             dz12             = _fjsp_sub_v2r8(iz1,jz2);
727             dx20             = _fjsp_sub_v2r8(ix2,jx0);
728             dy20             = _fjsp_sub_v2r8(iy2,jy0);
729             dz20             = _fjsp_sub_v2r8(iz2,jz0);
730             dx21             = _fjsp_sub_v2r8(ix2,jx1);
731             dy21             = _fjsp_sub_v2r8(iy2,jy1);
732             dz21             = _fjsp_sub_v2r8(iz2,jz1);
733             dx22             = _fjsp_sub_v2r8(ix2,jx2);
734             dy22             = _fjsp_sub_v2r8(iy2,jy2);
735             dz22             = _fjsp_sub_v2r8(iz2,jz2);
736
737             /* Calculate squared distance and things based on it */
738             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
739             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
740             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
741             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
742             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
743             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
744             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
745             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
746             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
747
748             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
749             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
750             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
751             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
752             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
753             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
754             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
755             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
756             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
757
758             fjx0             = _fjsp_setzero_v2r8();
759             fjy0             = _fjsp_setzero_v2r8();
760             fjz0             = _fjsp_setzero_v2r8();
761             fjx1             = _fjsp_setzero_v2r8();
762             fjy1             = _fjsp_setzero_v2r8();
763             fjz1             = _fjsp_setzero_v2r8();
764             fjx2             = _fjsp_setzero_v2r8();
765             fjy2             = _fjsp_setzero_v2r8();
766             fjz2             = _fjsp_setzero_v2r8();
767
768             /**************************
769              * CALCULATE INTERACTIONS *
770              **************************/
771
772             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
773
774             /* Calculate table index by multiplying r with table scale and truncate to integer */
775             rt               = _fjsp_mul_v2r8(r00,vftabscale);
776             itab_tmp         = _fjsp_dtox_v2r8(rt);
777             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
778             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
779             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
780
781             vfconv.i[0]     *= 12;
782             vfconv.i[1]     *= 12;
783
784             /* CUBIC SPLINE TABLE ELECTROSTATICS */
785             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
786             F                = _fjsp_setzero_v2r8();
787             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
788             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
789             H                = _fjsp_setzero_v2r8();
790             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
791             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
792             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
793             velec            = _fjsp_mul_v2r8(qq00,VV);
794             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
795             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
796
797             /* CUBIC SPLINE TABLE DISPERSION */
798             vfconv.i[0]       += 4;
799             vfconv.i[1]       += 4;
800             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
801             F                = _fjsp_setzero_v2r8();
802             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
803             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
804             H                = _fjsp_setzero_v2r8();
805             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
806             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
807             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
808             vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
809             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
810             fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
811
812             /* CUBIC SPLINE TABLE REPULSION */
813             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
814             F                = _fjsp_setzero_v2r8();
815             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
816             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
817             H                = _fjsp_setzero_v2r8();
818             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
819             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
820             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
821             vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
822             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
823             fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
824             vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
825             fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
826
827             /* Update potential sum for this i atom from the interaction with this j atom. */
828             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
829             velecsum         = _fjsp_add_v2r8(velecsum,velec);
830             vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
831             vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
832
833             fscal            = _fjsp_add_v2r8(felec,fvdw);
834
835             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
836
837             /* Update vectorial force */
838             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
839             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
840             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
841             
842             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
843             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
844             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
845
846             /**************************
847              * CALCULATE INTERACTIONS *
848              **************************/
849
850             r01              = _fjsp_mul_v2r8(rsq01,rinv01);
851
852             /* Calculate table index by multiplying r with table scale and truncate to integer */
853             rt               = _fjsp_mul_v2r8(r01,vftabscale);
854             itab_tmp         = _fjsp_dtox_v2r8(rt);
855             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
856             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
857             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
858
859             vfconv.i[0]     *= 12;
860             vfconv.i[1]     *= 12;
861
862             /* CUBIC SPLINE TABLE ELECTROSTATICS */
863             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
864             F                = _fjsp_setzero_v2r8();
865             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
866             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
867             H                = _fjsp_setzero_v2r8();
868             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
869             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
870             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
871             velec            = _fjsp_mul_v2r8(qq01,VV);
872             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
873             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
874
875             /* Update potential sum for this i atom from the interaction with this j atom. */
876             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
877             velecsum         = _fjsp_add_v2r8(velecsum,velec);
878
879             fscal            = felec;
880
881             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
882
883             /* Update vectorial force */
884             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
885             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
886             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
887             
888             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
889             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
890             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
891
892             /**************************
893              * CALCULATE INTERACTIONS *
894              **************************/
895
896             r02              = _fjsp_mul_v2r8(rsq02,rinv02);
897
898             /* Calculate table index by multiplying r with table scale and truncate to integer */
899             rt               = _fjsp_mul_v2r8(r02,vftabscale);
900             itab_tmp         = _fjsp_dtox_v2r8(rt);
901             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
902             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
903             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
904
905             vfconv.i[0]     *= 12;
906             vfconv.i[1]     *= 12;
907
908             /* CUBIC SPLINE TABLE ELECTROSTATICS */
909             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
910             F                = _fjsp_setzero_v2r8();
911             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
912             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
913             H                = _fjsp_setzero_v2r8();
914             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
915             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
916             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
917             velec            = _fjsp_mul_v2r8(qq02,VV);
918             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
919             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
920
921             /* Update potential sum for this i atom from the interaction with this j atom. */
922             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
923             velecsum         = _fjsp_add_v2r8(velecsum,velec);
924
925             fscal            = felec;
926
927             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
928
929             /* Update vectorial force */
930             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
931             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
932             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
933             
934             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
935             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
936             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
937
938             /**************************
939              * CALCULATE INTERACTIONS *
940              **************************/
941
942             r10              = _fjsp_mul_v2r8(rsq10,rinv10);
943
944             /* Calculate table index by multiplying r with table scale and truncate to integer */
945             rt               = _fjsp_mul_v2r8(r10,vftabscale);
946             itab_tmp         = _fjsp_dtox_v2r8(rt);
947             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
948             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
949             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
950
951             vfconv.i[0]     *= 12;
952             vfconv.i[1]     *= 12;
953
954             /* CUBIC SPLINE TABLE ELECTROSTATICS */
955             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
956             F                = _fjsp_setzero_v2r8();
957             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
958             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
959             H                = _fjsp_setzero_v2r8();
960             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
961             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
962             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
963             velec            = _fjsp_mul_v2r8(qq10,VV);
964             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
965             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
966
967             /* Update potential sum for this i atom from the interaction with this j atom. */
968             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
969             velecsum         = _fjsp_add_v2r8(velecsum,velec);
970
971             fscal            = felec;
972
973             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
974
975             /* Update vectorial force */
976             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
977             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
978             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
979             
980             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
981             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
982             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
983
984             /**************************
985              * CALCULATE INTERACTIONS *
986              **************************/
987
988             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
989
990             /* Calculate table index by multiplying r with table scale and truncate to integer */
991             rt               = _fjsp_mul_v2r8(r11,vftabscale);
992             itab_tmp         = _fjsp_dtox_v2r8(rt);
993             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
994             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
995             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
996
997             vfconv.i[0]     *= 12;
998             vfconv.i[1]     *= 12;
999
1000             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1001             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1002             F                = _fjsp_setzero_v2r8();
1003             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1004             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1005             H                = _fjsp_setzero_v2r8();
1006             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1007             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1008             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
1009             velec            = _fjsp_mul_v2r8(qq11,VV);
1010             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1011             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
1012
1013             /* Update potential sum for this i atom from the interaction with this j atom. */
1014             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1015             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1016
1017             fscal            = felec;
1018
1019             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1020
1021             /* Update vectorial force */
1022             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1023             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1024             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1025             
1026             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1027             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1028             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1029
1030             /**************************
1031              * CALCULATE INTERACTIONS *
1032              **************************/
1033
1034             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
1035
1036             /* Calculate table index by multiplying r with table scale and truncate to integer */
1037             rt               = _fjsp_mul_v2r8(r12,vftabscale);
1038             itab_tmp         = _fjsp_dtox_v2r8(rt);
1039             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1040             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1041             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1042
1043             vfconv.i[0]     *= 12;
1044             vfconv.i[1]     *= 12;
1045
1046             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1047             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1048             F                = _fjsp_setzero_v2r8();
1049             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1050             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1051             H                = _fjsp_setzero_v2r8();
1052             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1053             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1054             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
1055             velec            = _fjsp_mul_v2r8(qq12,VV);
1056             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1057             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
1058
1059             /* Update potential sum for this i atom from the interaction with this j atom. */
1060             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1061             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1062
1063             fscal            = felec;
1064
1065             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1066
1067             /* Update vectorial force */
1068             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1069             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1070             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1071             
1072             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1073             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1074             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1075
1076             /**************************
1077              * CALCULATE INTERACTIONS *
1078              **************************/
1079
1080             r20              = _fjsp_mul_v2r8(rsq20,rinv20);
1081
1082             /* Calculate table index by multiplying r with table scale and truncate to integer */
1083             rt               = _fjsp_mul_v2r8(r20,vftabscale);
1084             itab_tmp         = _fjsp_dtox_v2r8(rt);
1085             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1086             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1087             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1088
1089             vfconv.i[0]     *= 12;
1090             vfconv.i[1]     *= 12;
1091
1092             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1093             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1094             F                = _fjsp_setzero_v2r8();
1095             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1096             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1097             H                = _fjsp_setzero_v2r8();
1098             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1099             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1100             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
1101             velec            = _fjsp_mul_v2r8(qq20,VV);
1102             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1103             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
1104
1105             /* Update potential sum for this i atom from the interaction with this j atom. */
1106             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1107             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1108
1109             fscal            = felec;
1110
1111             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1112
1113             /* Update vectorial force */
1114             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
1115             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1116             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1117             
1118             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1119             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1120             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1121
1122             /**************************
1123              * CALCULATE INTERACTIONS *
1124              **************************/
1125
1126             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
1127
1128             /* Calculate table index by multiplying r with table scale and truncate to integer */
1129             rt               = _fjsp_mul_v2r8(r21,vftabscale);
1130             itab_tmp         = _fjsp_dtox_v2r8(rt);
1131             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1132             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1133             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1134
1135             vfconv.i[0]     *= 12;
1136             vfconv.i[1]     *= 12;
1137
1138             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1139             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1140             F                = _fjsp_setzero_v2r8();
1141             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1142             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1143             H                = _fjsp_setzero_v2r8();
1144             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1145             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1146             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
1147             velec            = _fjsp_mul_v2r8(qq21,VV);
1148             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1149             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
1150
1151             /* Update potential sum for this i atom from the interaction with this j atom. */
1152             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1153             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1154
1155             fscal            = felec;
1156
1157             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1158
1159             /* Update vectorial force */
1160             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1161             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1162             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1163             
1164             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1165             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1166             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1167
1168             /**************************
1169              * CALCULATE INTERACTIONS *
1170              **************************/
1171
1172             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
1173
1174             /* Calculate table index by multiplying r with table scale and truncate to integer */
1175             rt               = _fjsp_mul_v2r8(r22,vftabscale);
1176             itab_tmp         = _fjsp_dtox_v2r8(rt);
1177             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1178             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1179             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1180
1181             vfconv.i[0]     *= 12;
1182             vfconv.i[1]     *= 12;
1183
1184             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1185             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1186             F                = _fjsp_setzero_v2r8();
1187             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1188             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1189             H                = _fjsp_setzero_v2r8();
1190             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1191             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1192             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
1193             velec            = _fjsp_mul_v2r8(qq22,VV);
1194             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1195             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
1196
1197             /* Update potential sum for this i atom from the interaction with this j atom. */
1198             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1199             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1200
1201             fscal            = felec;
1202
1203             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1204
1205             /* Update vectorial force */
1206             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1207             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1208             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1209             
1210             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1211             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1212             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1213
1214             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1215
1216             /* Inner loop uses 444 flops */
1217         }
1218
1219         /* End of innermost loop */
1220
1221         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1222                                               f+i_coord_offset,fshift+i_shift_offset);
1223
1224         ggid                        = gid[iidx];
1225         /* Update potential energies */
1226         gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
1227         gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
1228
1229         /* Increment number of inner iterations */
1230         inneriter                  += j_index_end - j_index_start;
1231
1232         /* Outer loop uses 20 flops */
1233     }
1234
1235     /* Increment number of outer iterations */
1236     outeriter        += nri;
1237
1238     /* Update outer/inner flops */
1239
1240     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*444);
1241 }
1242 /*
1243  * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
1244  * Electrostatics interaction: CubicSplineTable
1245  * VdW interaction:            CubicSplineTable
1246  * Geometry:                   Water3-Water3
1247  * Calculate force/pot:        Force
1248  */
1249 void
1250 nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
1251                     (t_nblist                    * gmx_restrict       nlist,
1252                      rvec                        * gmx_restrict          xx,
1253                      rvec                        * gmx_restrict          ff,
1254                      t_forcerec                  * gmx_restrict          fr,
1255                      t_mdatoms                   * gmx_restrict     mdatoms,
1256                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
1257                      t_nrnb                      * gmx_restrict        nrnb)
1258 {
1259     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1260      * just 0 for non-waters.
1261      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
1262      * jnr indices corresponding to data put in the four positions in the SIMD register.
1263      */
1264     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
1265     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1266     int              jnrA,jnrB;
1267     int              j_coord_offsetA,j_coord_offsetB;
1268     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
1269     real             rcutoff_scalar;
1270     real             *shiftvec,*fshift,*x,*f;
1271     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1272     int              vdwioffset0;
1273     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1274     int              vdwioffset1;
1275     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1276     int              vdwioffset2;
1277     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1278     int              vdwjidx0A,vdwjidx0B;
1279     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1280     int              vdwjidx1A,vdwjidx1B;
1281     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1282     int              vdwjidx2A,vdwjidx2B;
1283     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1284     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1285     _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
1286     _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
1287     _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
1288     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1289     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1290     _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
1291     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1292     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1293     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
1294     real             *charge;
1295     int              nvdwtype;
1296     _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
1297     int              *vdwtype;
1298     real             *vdwparam;
1299     _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
1300     _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
1301     _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
1302     real             *vftab;
1303     _fjsp_v2r8       itab_tmp;
1304     _fjsp_v2r8       dummy_mask,cutoff_mask;
1305     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
1306     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
1307     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1308
1309     x                = xx[0];
1310     f                = ff[0];
1311
1312     nri              = nlist->nri;
1313     iinr             = nlist->iinr;
1314     jindex           = nlist->jindex;
1315     jjnr             = nlist->jjnr;
1316     shiftidx         = nlist->shift;
1317     gid              = nlist->gid;
1318     shiftvec         = fr->shift_vec[0];
1319     fshift           = fr->fshift[0];
1320     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
1321     charge           = mdatoms->chargeA;
1322     nvdwtype         = fr->ntype;
1323     vdwparam         = fr->nbfp;
1324     vdwtype          = mdatoms->typeA;
1325
1326     vftab            = kernel_data->table_elec_vdw->data;
1327     vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale);
1328
1329     /* Setup water-specific parameters */
1330     inr              = nlist->iinr[0];
1331     iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
1332     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1333     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1334     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
1335
1336     jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
1337     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
1338     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
1339     vdwjidx0A        = 2*vdwtype[inr+0];
1340     qq00             = _fjsp_mul_v2r8(iq0,jq0);
1341     c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
1342     c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
1343     qq01             = _fjsp_mul_v2r8(iq0,jq1);
1344     qq02             = _fjsp_mul_v2r8(iq0,jq2);
1345     qq10             = _fjsp_mul_v2r8(iq1,jq0);
1346     qq11             = _fjsp_mul_v2r8(iq1,jq1);
1347     qq12             = _fjsp_mul_v2r8(iq1,jq2);
1348     qq20             = _fjsp_mul_v2r8(iq2,jq0);
1349     qq21             = _fjsp_mul_v2r8(iq2,jq1);
1350     qq22             = _fjsp_mul_v2r8(iq2,jq2);
1351
1352     /* Avoid stupid compiler warnings */
1353     jnrA = jnrB = 0;
1354     j_coord_offsetA = 0;
1355     j_coord_offsetB = 0;
1356
1357     outeriter        = 0;
1358     inneriter        = 0;
1359
1360     /* Start outer loop over neighborlists */
1361     for(iidx=0; iidx<nri; iidx++)
1362     {
1363         /* Load shift vector for this list */
1364         i_shift_offset   = DIM*shiftidx[iidx];
1365
1366         /* Load limits for loop over neighbors */
1367         j_index_start    = jindex[iidx];
1368         j_index_end      = jindex[iidx+1];
1369
1370         /* Get outer coordinate index */
1371         inr              = iinr[iidx];
1372         i_coord_offset   = DIM*inr;
1373
1374         /* Load i particle coords and add shift vector */
1375         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
1376                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
1377
1378         fix0             = _fjsp_setzero_v2r8();
1379         fiy0             = _fjsp_setzero_v2r8();
1380         fiz0             = _fjsp_setzero_v2r8();
1381         fix1             = _fjsp_setzero_v2r8();
1382         fiy1             = _fjsp_setzero_v2r8();
1383         fiz1             = _fjsp_setzero_v2r8();
1384         fix2             = _fjsp_setzero_v2r8();
1385         fiy2             = _fjsp_setzero_v2r8();
1386         fiz2             = _fjsp_setzero_v2r8();
1387
1388         /* Start inner kernel loop */
1389         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1390         {
1391
1392             /* Get j neighbor index, and coordinate index */
1393             jnrA             = jjnr[jidx];
1394             jnrB             = jjnr[jidx+1];
1395             j_coord_offsetA  = DIM*jnrA;
1396             j_coord_offsetB  = DIM*jnrB;
1397
1398             /* load j atom coordinates */
1399             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
1400                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1401
1402             /* Calculate displacement vector */
1403             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1404             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1405             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1406             dx01             = _fjsp_sub_v2r8(ix0,jx1);
1407             dy01             = _fjsp_sub_v2r8(iy0,jy1);
1408             dz01             = _fjsp_sub_v2r8(iz0,jz1);
1409             dx02             = _fjsp_sub_v2r8(ix0,jx2);
1410             dy02             = _fjsp_sub_v2r8(iy0,jy2);
1411             dz02             = _fjsp_sub_v2r8(iz0,jz2);
1412             dx10             = _fjsp_sub_v2r8(ix1,jx0);
1413             dy10             = _fjsp_sub_v2r8(iy1,jy0);
1414             dz10             = _fjsp_sub_v2r8(iz1,jz0);
1415             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1416             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1417             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1418             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1419             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1420             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1421             dx20             = _fjsp_sub_v2r8(ix2,jx0);
1422             dy20             = _fjsp_sub_v2r8(iy2,jy0);
1423             dz20             = _fjsp_sub_v2r8(iz2,jz0);
1424             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1425             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1426             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1427             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1428             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1429             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1430
1431             /* Calculate squared distance and things based on it */
1432             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1433             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1434             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1435             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1436             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1437             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1438             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1439             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1440             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1441
1442             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
1443             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
1444             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
1445             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
1446             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1447             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1448             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
1449             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1450             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1451
1452             fjx0             = _fjsp_setzero_v2r8();
1453             fjy0             = _fjsp_setzero_v2r8();
1454             fjz0             = _fjsp_setzero_v2r8();
1455             fjx1             = _fjsp_setzero_v2r8();
1456             fjy1             = _fjsp_setzero_v2r8();
1457             fjz1             = _fjsp_setzero_v2r8();
1458             fjx2             = _fjsp_setzero_v2r8();
1459             fjy2             = _fjsp_setzero_v2r8();
1460             fjz2             = _fjsp_setzero_v2r8();
1461
1462             /**************************
1463              * CALCULATE INTERACTIONS *
1464              **************************/
1465
1466             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
1467
1468             /* Calculate table index by multiplying r with table scale and truncate to integer */
1469             rt               = _fjsp_mul_v2r8(r00,vftabscale);
1470             itab_tmp         = _fjsp_dtox_v2r8(rt);
1471             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1472             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1473             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1474
1475             vfconv.i[0]     *= 12;
1476             vfconv.i[1]     *= 12;
1477
1478             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1479             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1480             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1481             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1482             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1483             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1484             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1485             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1486             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1487             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
1488
1489             /* CUBIC SPLINE TABLE DISPERSION */
1490             vfconv.i[0]       += 4;
1491             vfconv.i[1]       += 4;
1492             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1493             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1494             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1495             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
1496             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
1497             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1498             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1499             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1500             fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
1501
1502             /* CUBIC SPLINE TABLE REPULSION */
1503             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
1504             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
1505             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1506             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
1507             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
1508             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1509             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1510             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1511             fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
1512             fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
1513
1514             fscal            = _fjsp_add_v2r8(felec,fvdw);
1515
1516             /* Update vectorial force */
1517             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1518             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1519             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1520             
1521             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1522             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1523             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1524
1525             /**************************
1526              * CALCULATE INTERACTIONS *
1527              **************************/
1528
1529             r01              = _fjsp_mul_v2r8(rsq01,rinv01);
1530
1531             /* Calculate table index by multiplying r with table scale and truncate to integer */
1532             rt               = _fjsp_mul_v2r8(r01,vftabscale);
1533             itab_tmp         = _fjsp_dtox_v2r8(rt);
1534             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1535             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1536             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1537
1538             vfconv.i[0]     *= 12;
1539             vfconv.i[1]     *= 12;
1540
1541             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1542             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1543             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1544             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1545             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1546             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1547             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1548             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1549             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1550             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
1551
1552             fscal            = felec;
1553
1554             /* Update vectorial force */
1555             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
1556             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1557             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1558             
1559             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1560             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1561             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1562
1563             /**************************
1564              * CALCULATE INTERACTIONS *
1565              **************************/
1566
1567             r02              = _fjsp_mul_v2r8(rsq02,rinv02);
1568
1569             /* Calculate table index by multiplying r with table scale and truncate to integer */
1570             rt               = _fjsp_mul_v2r8(r02,vftabscale);
1571             itab_tmp         = _fjsp_dtox_v2r8(rt);
1572             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1573             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1574             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1575
1576             vfconv.i[0]     *= 12;
1577             vfconv.i[1]     *= 12;
1578
1579             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1580             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1581             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1582             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1583             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1584             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1585             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1586             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1587             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1588             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
1589
1590             fscal            = felec;
1591
1592             /* Update vectorial force */
1593             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
1594             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1595             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1596             
1597             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1598             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1599             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1600
1601             /**************************
1602              * CALCULATE INTERACTIONS *
1603              **************************/
1604
1605             r10              = _fjsp_mul_v2r8(rsq10,rinv10);
1606
1607             /* Calculate table index by multiplying r with table scale and truncate to integer */
1608             rt               = _fjsp_mul_v2r8(r10,vftabscale);
1609             itab_tmp         = _fjsp_dtox_v2r8(rt);
1610             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1611             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1612             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1613
1614             vfconv.i[0]     *= 12;
1615             vfconv.i[1]     *= 12;
1616
1617             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1618             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1619             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1620             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1621             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1622             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1623             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1624             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1625             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1626             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
1627
1628             fscal            = felec;
1629
1630             /* Update vectorial force */
1631             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
1632             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1633             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1634             
1635             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1636             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1637             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1638
1639             /**************************
1640              * CALCULATE INTERACTIONS *
1641              **************************/
1642
1643             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
1644
1645             /* Calculate table index by multiplying r with table scale and truncate to integer */
1646             rt               = _fjsp_mul_v2r8(r11,vftabscale);
1647             itab_tmp         = _fjsp_dtox_v2r8(rt);
1648             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1649             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1650             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1651
1652             vfconv.i[0]     *= 12;
1653             vfconv.i[1]     *= 12;
1654
1655             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1656             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1657             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1658             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1659             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1660             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1661             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1662             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1663             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1664             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
1665
1666             fscal            = felec;
1667
1668             /* Update vectorial force */
1669             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1670             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1671             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1672             
1673             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1674             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1675             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1676
1677             /**************************
1678              * CALCULATE INTERACTIONS *
1679              **************************/
1680
1681             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
1682
1683             /* Calculate table index by multiplying r with table scale and truncate to integer */
1684             rt               = _fjsp_mul_v2r8(r12,vftabscale);
1685             itab_tmp         = _fjsp_dtox_v2r8(rt);
1686             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1687             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1688             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1689
1690             vfconv.i[0]     *= 12;
1691             vfconv.i[1]     *= 12;
1692
1693             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1694             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1695             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1696             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1697             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1698             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1699             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1700             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1701             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1702             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
1703
1704             fscal            = felec;
1705
1706             /* Update vectorial force */
1707             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1708             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1709             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1710             
1711             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1712             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1713             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1714
1715             /**************************
1716              * CALCULATE INTERACTIONS *
1717              **************************/
1718
1719             r20              = _fjsp_mul_v2r8(rsq20,rinv20);
1720
1721             /* Calculate table index by multiplying r with table scale and truncate to integer */
1722             rt               = _fjsp_mul_v2r8(r20,vftabscale);
1723             itab_tmp         = _fjsp_dtox_v2r8(rt);
1724             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1725             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1726             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1727
1728             vfconv.i[0]     *= 12;
1729             vfconv.i[1]     *= 12;
1730
1731             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1732             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1733             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1734             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1735             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1736             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1737             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1738             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1739             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1740             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
1741
1742             fscal            = felec;
1743
1744             /* Update vectorial force */
1745             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
1746             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1747             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1748             
1749             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1750             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1751             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1752
1753             /**************************
1754              * CALCULATE INTERACTIONS *
1755              **************************/
1756
1757             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
1758
1759             /* Calculate table index by multiplying r with table scale and truncate to integer */
1760             rt               = _fjsp_mul_v2r8(r21,vftabscale);
1761             itab_tmp         = _fjsp_dtox_v2r8(rt);
1762             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1763             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1764             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1765
1766             vfconv.i[0]     *= 12;
1767             vfconv.i[1]     *= 12;
1768
1769             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1770             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1771             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1772             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1773             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1774             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1775             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1776             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1777             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1778             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
1779
1780             fscal            = felec;
1781
1782             /* Update vectorial force */
1783             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1784             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1785             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1786             
1787             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1788             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1789             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1790
1791             /**************************
1792              * CALCULATE INTERACTIONS *
1793              **************************/
1794
1795             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
1796
1797             /* Calculate table index by multiplying r with table scale and truncate to integer */
1798             rt               = _fjsp_mul_v2r8(r22,vftabscale);
1799             itab_tmp         = _fjsp_dtox_v2r8(rt);
1800             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1801             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1802             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1803
1804             vfconv.i[0]     *= 12;
1805             vfconv.i[1]     *= 12;
1806
1807             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1808             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1809             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1810             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1811             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1812             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1813             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1814             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1815             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1816             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
1817
1818             fscal            = felec;
1819
1820             /* Update vectorial force */
1821             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1822             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1823             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1824             
1825             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1826             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1827             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1828
1829             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1830
1831             /* Inner loop uses 400 flops */
1832         }
1833
1834         if(jidx<j_index_end)
1835         {
1836
1837             jnrA             = jjnr[jidx];
1838             j_coord_offsetA  = DIM*jnrA;
1839
1840             /* load j atom coordinates */
1841             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1842                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1843
1844             /* Calculate displacement vector */
1845             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1846             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1847             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1848             dx01             = _fjsp_sub_v2r8(ix0,jx1);
1849             dy01             = _fjsp_sub_v2r8(iy0,jy1);
1850             dz01             = _fjsp_sub_v2r8(iz0,jz1);
1851             dx02             = _fjsp_sub_v2r8(ix0,jx2);
1852             dy02             = _fjsp_sub_v2r8(iy0,jy2);
1853             dz02             = _fjsp_sub_v2r8(iz0,jz2);
1854             dx10             = _fjsp_sub_v2r8(ix1,jx0);
1855             dy10             = _fjsp_sub_v2r8(iy1,jy0);
1856             dz10             = _fjsp_sub_v2r8(iz1,jz0);
1857             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1858             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1859             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1860             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1861             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1862             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1863             dx20             = _fjsp_sub_v2r8(ix2,jx0);
1864             dy20             = _fjsp_sub_v2r8(iy2,jy0);
1865             dz20             = _fjsp_sub_v2r8(iz2,jz0);
1866             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1867             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1868             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1869             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1870             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1871             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1872
1873             /* Calculate squared distance and things based on it */
1874             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1875             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1876             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1877             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1878             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1879             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1880             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1881             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1882             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1883
1884             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
1885             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
1886             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
1887             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
1888             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1889             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1890             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
1891             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1892             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1893
1894             fjx0             = _fjsp_setzero_v2r8();
1895             fjy0             = _fjsp_setzero_v2r8();
1896             fjz0             = _fjsp_setzero_v2r8();
1897             fjx1             = _fjsp_setzero_v2r8();
1898             fjy1             = _fjsp_setzero_v2r8();
1899             fjz1             = _fjsp_setzero_v2r8();
1900             fjx2             = _fjsp_setzero_v2r8();
1901             fjy2             = _fjsp_setzero_v2r8();
1902             fjz2             = _fjsp_setzero_v2r8();
1903
1904             /**************************
1905              * CALCULATE INTERACTIONS *
1906              **************************/
1907
1908             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
1909
1910             /* Calculate table index by multiplying r with table scale and truncate to integer */
1911             rt               = _fjsp_mul_v2r8(r00,vftabscale);
1912             itab_tmp         = _fjsp_dtox_v2r8(rt);
1913             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1914             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1915             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1916
1917             vfconv.i[0]     *= 12;
1918             vfconv.i[1]     *= 12;
1919
1920             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1921             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1922             F                = _fjsp_setzero_v2r8();
1923             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1924             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1925             H                = _fjsp_setzero_v2r8();
1926             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1927             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1928             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1929             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
1930
1931             /* CUBIC SPLINE TABLE DISPERSION */
1932             vfconv.i[0]       += 4;
1933             vfconv.i[1]       += 4;
1934             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1935             F                = _fjsp_setzero_v2r8();
1936             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1937             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
1938             H                = _fjsp_setzero_v2r8();
1939             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1940             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1941             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1942             fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
1943
1944             /* CUBIC SPLINE TABLE REPULSION */
1945             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
1946             F                = _fjsp_setzero_v2r8();
1947             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1948             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
1949             H                = _fjsp_setzero_v2r8();
1950             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1951             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1952             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1953             fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
1954             fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
1955
1956             fscal            = _fjsp_add_v2r8(felec,fvdw);
1957
1958             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1959
1960             /* Update vectorial force */
1961             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1962             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1963             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1964             
1965             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1966             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1967             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1968
1969             /**************************
1970              * CALCULATE INTERACTIONS *
1971              **************************/
1972
1973             r01              = _fjsp_mul_v2r8(rsq01,rinv01);
1974
1975             /* Calculate table index by multiplying r with table scale and truncate to integer */
1976             rt               = _fjsp_mul_v2r8(r01,vftabscale);
1977             itab_tmp         = _fjsp_dtox_v2r8(rt);
1978             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1979             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1980             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1981
1982             vfconv.i[0]     *= 12;
1983             vfconv.i[1]     *= 12;
1984
1985             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1986             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1987             F                = _fjsp_setzero_v2r8();
1988             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1989             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1990             H                = _fjsp_setzero_v2r8();
1991             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1992             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1993             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1994             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
1995
1996             fscal            = felec;
1997
1998             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1999
2000             /* Update vectorial force */
2001             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
2002             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
2003             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
2004             
2005             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
2006             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
2007             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
2008
2009             /**************************
2010              * CALCULATE INTERACTIONS *
2011              **************************/
2012
2013             r02              = _fjsp_mul_v2r8(rsq02,rinv02);
2014
2015             /* Calculate table index by multiplying r with table scale and truncate to integer */
2016             rt               = _fjsp_mul_v2r8(r02,vftabscale);
2017             itab_tmp         = _fjsp_dtox_v2r8(rt);
2018             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2019             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
2020             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2021
2022             vfconv.i[0]     *= 12;
2023             vfconv.i[1]     *= 12;
2024
2025             /* CUBIC SPLINE TABLE ELECTROSTATICS */
2026             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2027             F                = _fjsp_setzero_v2r8();
2028             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2029             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2030             H                = _fjsp_setzero_v2r8();
2031             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2032             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2033             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2034             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
2035
2036             fscal            = felec;
2037
2038             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2039
2040             /* Update vectorial force */
2041             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
2042             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
2043             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
2044             
2045             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
2046             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
2047             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
2048
2049             /**************************
2050              * CALCULATE INTERACTIONS *
2051              **************************/
2052
2053             r10              = _fjsp_mul_v2r8(rsq10,rinv10);
2054
2055             /* Calculate table index by multiplying r with table scale and truncate to integer */
2056             rt               = _fjsp_mul_v2r8(r10,vftabscale);
2057             itab_tmp         = _fjsp_dtox_v2r8(rt);
2058             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2059             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
2060             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2061
2062             vfconv.i[0]     *= 12;
2063             vfconv.i[1]     *= 12;
2064
2065             /* CUBIC SPLINE TABLE ELECTROSTATICS */
2066             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2067             F                = _fjsp_setzero_v2r8();
2068             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2069             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2070             H                = _fjsp_setzero_v2r8();
2071             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2072             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2073             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2074             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
2075
2076             fscal            = felec;
2077
2078             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2079
2080             /* Update vectorial force */
2081             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
2082             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
2083             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
2084             
2085             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
2086             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
2087             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
2088
2089             /**************************
2090              * CALCULATE INTERACTIONS *
2091              **************************/
2092
2093             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
2094
2095             /* Calculate table index by multiplying r with table scale and truncate to integer */
2096             rt               = _fjsp_mul_v2r8(r11,vftabscale);
2097             itab_tmp         = _fjsp_dtox_v2r8(rt);
2098             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2099             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
2100             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2101
2102             vfconv.i[0]     *= 12;
2103             vfconv.i[1]     *= 12;
2104
2105             /* CUBIC SPLINE TABLE ELECTROSTATICS */
2106             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2107             F                = _fjsp_setzero_v2r8();
2108             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2109             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2110             H                = _fjsp_setzero_v2r8();
2111             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2112             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2113             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2114             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
2115
2116             fscal            = felec;
2117
2118             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2119
2120             /* Update vectorial force */
2121             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
2122             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
2123             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
2124             
2125             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
2126             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
2127             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
2128
2129             /**************************
2130              * CALCULATE INTERACTIONS *
2131              **************************/
2132
2133             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
2134
2135             /* Calculate table index by multiplying r with table scale and truncate to integer */
2136             rt               = _fjsp_mul_v2r8(r12,vftabscale);
2137             itab_tmp         = _fjsp_dtox_v2r8(rt);
2138             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2139             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
2140             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2141
2142             vfconv.i[0]     *= 12;
2143             vfconv.i[1]     *= 12;
2144
2145             /* CUBIC SPLINE TABLE ELECTROSTATICS */
2146             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2147             F                = _fjsp_setzero_v2r8();
2148             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2149             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2150             H                = _fjsp_setzero_v2r8();
2151             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2152             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2153             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2154             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
2155
2156             fscal            = felec;
2157
2158             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2159
2160             /* Update vectorial force */
2161             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
2162             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
2163             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
2164             
2165             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
2166             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
2167             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
2168
2169             /**************************
2170              * CALCULATE INTERACTIONS *
2171              **************************/
2172
2173             r20              = _fjsp_mul_v2r8(rsq20,rinv20);
2174
2175             /* Calculate table index by multiplying r with table scale and truncate to integer */
2176             rt               = _fjsp_mul_v2r8(r20,vftabscale);
2177             itab_tmp         = _fjsp_dtox_v2r8(rt);
2178             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2179             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
2180             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2181
2182             vfconv.i[0]     *= 12;
2183             vfconv.i[1]     *= 12;
2184
2185             /* CUBIC SPLINE TABLE ELECTROSTATICS */
2186             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2187             F                = _fjsp_setzero_v2r8();
2188             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2189             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2190             H                = _fjsp_setzero_v2r8();
2191             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2192             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2193             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2194             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
2195
2196             fscal            = felec;
2197
2198             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2199
2200             /* Update vectorial force */
2201             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
2202             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
2203             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
2204             
2205             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
2206             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
2207             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
2208
2209             /**************************
2210              * CALCULATE INTERACTIONS *
2211              **************************/
2212
2213             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
2214
2215             /* Calculate table index by multiplying r with table scale and truncate to integer */
2216             rt               = _fjsp_mul_v2r8(r21,vftabscale);
2217             itab_tmp         = _fjsp_dtox_v2r8(rt);
2218             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2219             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
2220             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2221
2222             vfconv.i[0]     *= 12;
2223             vfconv.i[1]     *= 12;
2224
2225             /* CUBIC SPLINE TABLE ELECTROSTATICS */
2226             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2227             F                = _fjsp_setzero_v2r8();
2228             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2229             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2230             H                = _fjsp_setzero_v2r8();
2231             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2232             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2233             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2234             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
2235
2236             fscal            = felec;
2237
2238             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2239
2240             /* Update vectorial force */
2241             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
2242             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
2243             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
2244             
2245             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
2246             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
2247             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
2248
2249             /**************************
2250              * CALCULATE INTERACTIONS *
2251              **************************/
2252
2253             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
2254
2255             /* Calculate table index by multiplying r with table scale and truncate to integer */
2256             rt               = _fjsp_mul_v2r8(r22,vftabscale);
2257             itab_tmp         = _fjsp_dtox_v2r8(rt);
2258             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2259             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
2260             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2261
2262             vfconv.i[0]     *= 12;
2263             vfconv.i[1]     *= 12;
2264
2265             /* CUBIC SPLINE TABLE ELECTROSTATICS */
2266             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2267             F                = _fjsp_setzero_v2r8();
2268             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2269             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2270             H                = _fjsp_setzero_v2r8();
2271             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2272             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2273             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2274             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
2275
2276             fscal            = felec;
2277
2278             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2279
2280             /* Update vectorial force */
2281             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
2282             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
2283             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
2284             
2285             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
2286             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
2287             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
2288
2289             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
2290
2291             /* Inner loop uses 400 flops */
2292         }
2293
2294         /* End of innermost loop */
2295
2296         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
2297                                               f+i_coord_offset,fshift+i_shift_offset);
2298
2299         /* Increment number of inner iterations */
2300         inneriter                  += j_index_end - j_index_start;
2301
2302         /* Outer loop uses 18 flops */
2303     }
2304
2305     /* Increment number of outer iterations */
2306     outeriter        += nri;
2307
2308     /* Update outer/inner flops */
2309
2310     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*400);
2311 }