Use full path for legacyheaders
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_sparc64_hpc_ace_double / nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_sparc64_hpc_ace_double.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
37  */
38 #include "config.h"
39
40 #include <math.h>
41
42 #include "../nb_kernel.h"
43 #include "gromacs/legacyheaders/types/simple.h"
44 #include "gromacs/math/vec.h"
45 #include "gromacs/legacyheaders/nrnb.h"
46
47 #include "kernelutil_sparc64_hpc_ace_double.h"
48
49 /*
50  * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
51  * Electrostatics interaction: CubicSplineTable
52  * VdW interaction:            CubicSplineTable
53  * Geometry:                   Water3-Water3
54  * Calculate force/pot:        PotentialAndForce
55  */
56 void
57 nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
58                     (t_nblist                    * gmx_restrict       nlist,
59                      rvec                        * gmx_restrict          xx,
60                      rvec                        * gmx_restrict          ff,
61                      t_forcerec                  * gmx_restrict          fr,
62                      t_mdatoms                   * gmx_restrict     mdatoms,
63                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
64                      t_nrnb                      * gmx_restrict        nrnb)
65 {
66     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
67      * just 0 for non-waters.
68      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
69      * jnr indices corresponding to data put in the four positions in the SIMD register.
70      */
71     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
72     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
73     int              jnrA,jnrB;
74     int              j_coord_offsetA,j_coord_offsetB;
75     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
76     real             rcutoff_scalar;
77     real             *shiftvec,*fshift,*x,*f;
78     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
79     int              vdwioffset0;
80     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
81     int              vdwioffset1;
82     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
83     int              vdwioffset2;
84     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
85     int              vdwjidx0A,vdwjidx0B;
86     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
87     int              vdwjidx1A,vdwjidx1B;
88     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
89     int              vdwjidx2A,vdwjidx2B;
90     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
91     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
92     _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
93     _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
94     _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
95     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
96     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
97     _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
98     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
99     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
100     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
101     real             *charge;
102     int              nvdwtype;
103     _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
104     int              *vdwtype;
105     real             *vdwparam;
106     _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
107     _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
108     _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
109     real             *vftab;
110     _fjsp_v2r8       itab_tmp;
111     _fjsp_v2r8       dummy_mask,cutoff_mask;
112     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
113     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
114     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
115
116     x                = xx[0];
117     f                = ff[0];
118
119     nri              = nlist->nri;
120     iinr             = nlist->iinr;
121     jindex           = nlist->jindex;
122     jjnr             = nlist->jjnr;
123     shiftidx         = nlist->shift;
124     gid              = nlist->gid;
125     shiftvec         = fr->shift_vec[0];
126     fshift           = fr->fshift[0];
127     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
128     charge           = mdatoms->chargeA;
129     nvdwtype         = fr->ntype;
130     vdwparam         = fr->nbfp;
131     vdwtype          = mdatoms->typeA;
132
133     vftab            = kernel_data->table_elec_vdw->data;
134     vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale);
135
136     /* Setup water-specific parameters */
137     inr              = nlist->iinr[0];
138     iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
139     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
140     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
141     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
142
143     jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
144     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
145     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
146     vdwjidx0A        = 2*vdwtype[inr+0];
147     qq00             = _fjsp_mul_v2r8(iq0,jq0);
148     c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
149     c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
150     qq01             = _fjsp_mul_v2r8(iq0,jq1);
151     qq02             = _fjsp_mul_v2r8(iq0,jq2);
152     qq10             = _fjsp_mul_v2r8(iq1,jq0);
153     qq11             = _fjsp_mul_v2r8(iq1,jq1);
154     qq12             = _fjsp_mul_v2r8(iq1,jq2);
155     qq20             = _fjsp_mul_v2r8(iq2,jq0);
156     qq21             = _fjsp_mul_v2r8(iq2,jq1);
157     qq22             = _fjsp_mul_v2r8(iq2,jq2);
158
159     /* Avoid stupid compiler warnings */
160     jnrA = jnrB = 0;
161     j_coord_offsetA = 0;
162     j_coord_offsetB = 0;
163
164     outeriter        = 0;
165     inneriter        = 0;
166
167     /* Start outer loop over neighborlists */
168     for(iidx=0; iidx<nri; iidx++)
169     {
170         /* Load shift vector for this list */
171         i_shift_offset   = DIM*shiftidx[iidx];
172
173         /* Load limits for loop over neighbors */
174         j_index_start    = jindex[iidx];
175         j_index_end      = jindex[iidx+1];
176
177         /* Get outer coordinate index */
178         inr              = iinr[iidx];
179         i_coord_offset   = DIM*inr;
180
181         /* Load i particle coords and add shift vector */
182         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
183                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
184
185         fix0             = _fjsp_setzero_v2r8();
186         fiy0             = _fjsp_setzero_v2r8();
187         fiz0             = _fjsp_setzero_v2r8();
188         fix1             = _fjsp_setzero_v2r8();
189         fiy1             = _fjsp_setzero_v2r8();
190         fiz1             = _fjsp_setzero_v2r8();
191         fix2             = _fjsp_setzero_v2r8();
192         fiy2             = _fjsp_setzero_v2r8();
193         fiz2             = _fjsp_setzero_v2r8();
194
195         /* Reset potential sums */
196         velecsum         = _fjsp_setzero_v2r8();
197         vvdwsum          = _fjsp_setzero_v2r8();
198
199         /* Start inner kernel loop */
200         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
201         {
202
203             /* Get j neighbor index, and coordinate index */
204             jnrA             = jjnr[jidx];
205             jnrB             = jjnr[jidx+1];
206             j_coord_offsetA  = DIM*jnrA;
207             j_coord_offsetB  = DIM*jnrB;
208
209             /* load j atom coordinates */
210             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
211                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
212
213             /* Calculate displacement vector */
214             dx00             = _fjsp_sub_v2r8(ix0,jx0);
215             dy00             = _fjsp_sub_v2r8(iy0,jy0);
216             dz00             = _fjsp_sub_v2r8(iz0,jz0);
217             dx01             = _fjsp_sub_v2r8(ix0,jx1);
218             dy01             = _fjsp_sub_v2r8(iy0,jy1);
219             dz01             = _fjsp_sub_v2r8(iz0,jz1);
220             dx02             = _fjsp_sub_v2r8(ix0,jx2);
221             dy02             = _fjsp_sub_v2r8(iy0,jy2);
222             dz02             = _fjsp_sub_v2r8(iz0,jz2);
223             dx10             = _fjsp_sub_v2r8(ix1,jx0);
224             dy10             = _fjsp_sub_v2r8(iy1,jy0);
225             dz10             = _fjsp_sub_v2r8(iz1,jz0);
226             dx11             = _fjsp_sub_v2r8(ix1,jx1);
227             dy11             = _fjsp_sub_v2r8(iy1,jy1);
228             dz11             = _fjsp_sub_v2r8(iz1,jz1);
229             dx12             = _fjsp_sub_v2r8(ix1,jx2);
230             dy12             = _fjsp_sub_v2r8(iy1,jy2);
231             dz12             = _fjsp_sub_v2r8(iz1,jz2);
232             dx20             = _fjsp_sub_v2r8(ix2,jx0);
233             dy20             = _fjsp_sub_v2r8(iy2,jy0);
234             dz20             = _fjsp_sub_v2r8(iz2,jz0);
235             dx21             = _fjsp_sub_v2r8(ix2,jx1);
236             dy21             = _fjsp_sub_v2r8(iy2,jy1);
237             dz21             = _fjsp_sub_v2r8(iz2,jz1);
238             dx22             = _fjsp_sub_v2r8(ix2,jx2);
239             dy22             = _fjsp_sub_v2r8(iy2,jy2);
240             dz22             = _fjsp_sub_v2r8(iz2,jz2);
241
242             /* Calculate squared distance and things based on it */
243             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
244             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
245             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
246             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
247             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
248             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
249             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
250             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
251             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
252
253             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
254             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
255             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
256             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
257             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
258             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
259             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
260             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
261             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
262
263             fjx0             = _fjsp_setzero_v2r8();
264             fjy0             = _fjsp_setzero_v2r8();
265             fjz0             = _fjsp_setzero_v2r8();
266             fjx1             = _fjsp_setzero_v2r8();
267             fjy1             = _fjsp_setzero_v2r8();
268             fjz1             = _fjsp_setzero_v2r8();
269             fjx2             = _fjsp_setzero_v2r8();
270             fjy2             = _fjsp_setzero_v2r8();
271             fjz2             = _fjsp_setzero_v2r8();
272
273             /**************************
274              * CALCULATE INTERACTIONS *
275              **************************/
276
277             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
278
279             /* Calculate table index by multiplying r with table scale and truncate to integer */
280             rt               = _fjsp_mul_v2r8(r00,vftabscale);
281             itab_tmp         = _fjsp_dtox_v2r8(rt);
282             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
283             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
284             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
285
286             vfconv.i[0]     *= 12;
287             vfconv.i[1]     *= 12;
288
289             /* CUBIC SPLINE TABLE ELECTROSTATICS */
290             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
291             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
292             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
293             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
294             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
295             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
296             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
297             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
298             velec            = _fjsp_mul_v2r8(qq00,VV);
299             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
300             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
301
302             /* CUBIC SPLINE TABLE DISPERSION */
303             vfconv.i[0]       += 4;
304             vfconv.i[1]       += 4;
305             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
306             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
307             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
308             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
309             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
310             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
311             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
312             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
313             vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
314             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
315             fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
316
317             /* CUBIC SPLINE TABLE REPULSION */
318             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
319             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
320             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
321             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
322             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
323             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
324             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
325             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
326             vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
327             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
328             fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
329             vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
330             fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
331
332             /* Update potential sum for this i atom from the interaction with this j atom. */
333             velecsum         = _fjsp_add_v2r8(velecsum,velec);
334             vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
335
336             fscal            = _fjsp_add_v2r8(felec,fvdw);
337
338             /* Update vectorial force */
339             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
340             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
341             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
342             
343             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
344             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
345             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
346
347             /**************************
348              * CALCULATE INTERACTIONS *
349              **************************/
350
351             r01              = _fjsp_mul_v2r8(rsq01,rinv01);
352
353             /* Calculate table index by multiplying r with table scale and truncate to integer */
354             rt               = _fjsp_mul_v2r8(r01,vftabscale);
355             itab_tmp         = _fjsp_dtox_v2r8(rt);
356             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
357             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
358             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
359
360             vfconv.i[0]     *= 12;
361             vfconv.i[1]     *= 12;
362
363             /* CUBIC SPLINE TABLE ELECTROSTATICS */
364             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
365             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
366             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
367             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
368             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
369             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
370             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
371             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
372             velec            = _fjsp_mul_v2r8(qq01,VV);
373             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
374             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
375
376             /* Update potential sum for this i atom from the interaction with this j atom. */
377             velecsum         = _fjsp_add_v2r8(velecsum,velec);
378
379             fscal            = felec;
380
381             /* Update vectorial force */
382             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
383             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
384             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
385             
386             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
387             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
388             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
389
390             /**************************
391              * CALCULATE INTERACTIONS *
392              **************************/
393
394             r02              = _fjsp_mul_v2r8(rsq02,rinv02);
395
396             /* Calculate table index by multiplying r with table scale and truncate to integer */
397             rt               = _fjsp_mul_v2r8(r02,vftabscale);
398             itab_tmp         = _fjsp_dtox_v2r8(rt);
399             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
400             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
401             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
402
403             vfconv.i[0]     *= 12;
404             vfconv.i[1]     *= 12;
405
406             /* CUBIC SPLINE TABLE ELECTROSTATICS */
407             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
408             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
409             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
410             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
411             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
412             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
413             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
414             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
415             velec            = _fjsp_mul_v2r8(qq02,VV);
416             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
417             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
418
419             /* Update potential sum for this i atom from the interaction with this j atom. */
420             velecsum         = _fjsp_add_v2r8(velecsum,velec);
421
422             fscal            = felec;
423
424             /* Update vectorial force */
425             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
426             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
427             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
428             
429             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
430             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
431             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
432
433             /**************************
434              * CALCULATE INTERACTIONS *
435              **************************/
436
437             r10              = _fjsp_mul_v2r8(rsq10,rinv10);
438
439             /* Calculate table index by multiplying r with table scale and truncate to integer */
440             rt               = _fjsp_mul_v2r8(r10,vftabscale);
441             itab_tmp         = _fjsp_dtox_v2r8(rt);
442             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
443             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
444             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
445
446             vfconv.i[0]     *= 12;
447             vfconv.i[1]     *= 12;
448
449             /* CUBIC SPLINE TABLE ELECTROSTATICS */
450             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
451             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
452             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
453             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
454             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
455             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
456             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
457             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
458             velec            = _fjsp_mul_v2r8(qq10,VV);
459             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
460             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
461
462             /* Update potential sum for this i atom from the interaction with this j atom. */
463             velecsum         = _fjsp_add_v2r8(velecsum,velec);
464
465             fscal            = felec;
466
467             /* Update vectorial force */
468             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
469             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
470             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
471             
472             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
473             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
474             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
475
476             /**************************
477              * CALCULATE INTERACTIONS *
478              **************************/
479
480             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
481
482             /* Calculate table index by multiplying r with table scale and truncate to integer */
483             rt               = _fjsp_mul_v2r8(r11,vftabscale);
484             itab_tmp         = _fjsp_dtox_v2r8(rt);
485             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
486             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
487             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
488
489             vfconv.i[0]     *= 12;
490             vfconv.i[1]     *= 12;
491
492             /* CUBIC SPLINE TABLE ELECTROSTATICS */
493             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
494             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
495             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
496             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
497             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
498             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
499             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
500             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
501             velec            = _fjsp_mul_v2r8(qq11,VV);
502             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
503             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
504
505             /* Update potential sum for this i atom from the interaction with this j atom. */
506             velecsum         = _fjsp_add_v2r8(velecsum,velec);
507
508             fscal            = felec;
509
510             /* Update vectorial force */
511             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
512             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
513             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
514             
515             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
516             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
517             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
518
519             /**************************
520              * CALCULATE INTERACTIONS *
521              **************************/
522
523             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
524
525             /* Calculate table index by multiplying r with table scale and truncate to integer */
526             rt               = _fjsp_mul_v2r8(r12,vftabscale);
527             itab_tmp         = _fjsp_dtox_v2r8(rt);
528             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
529             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
530             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
531
532             vfconv.i[0]     *= 12;
533             vfconv.i[1]     *= 12;
534
535             /* CUBIC SPLINE TABLE ELECTROSTATICS */
536             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
537             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
538             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
539             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
540             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
541             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
542             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
543             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
544             velec            = _fjsp_mul_v2r8(qq12,VV);
545             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
546             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
547
548             /* Update potential sum for this i atom from the interaction with this j atom. */
549             velecsum         = _fjsp_add_v2r8(velecsum,velec);
550
551             fscal            = felec;
552
553             /* Update vectorial force */
554             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
555             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
556             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
557             
558             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
559             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
560             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
561
562             /**************************
563              * CALCULATE INTERACTIONS *
564              **************************/
565
566             r20              = _fjsp_mul_v2r8(rsq20,rinv20);
567
568             /* Calculate table index by multiplying r with table scale and truncate to integer */
569             rt               = _fjsp_mul_v2r8(r20,vftabscale);
570             itab_tmp         = _fjsp_dtox_v2r8(rt);
571             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
572             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
573             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
574
575             vfconv.i[0]     *= 12;
576             vfconv.i[1]     *= 12;
577
578             /* CUBIC SPLINE TABLE ELECTROSTATICS */
579             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
580             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
581             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
582             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
583             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
584             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
585             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
586             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
587             velec            = _fjsp_mul_v2r8(qq20,VV);
588             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
589             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
590
591             /* Update potential sum for this i atom from the interaction with this j atom. */
592             velecsum         = _fjsp_add_v2r8(velecsum,velec);
593
594             fscal            = felec;
595
596             /* Update vectorial force */
597             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
598             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
599             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
600             
601             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
602             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
603             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
604
605             /**************************
606              * CALCULATE INTERACTIONS *
607              **************************/
608
609             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
610
611             /* Calculate table index by multiplying r with table scale and truncate to integer */
612             rt               = _fjsp_mul_v2r8(r21,vftabscale);
613             itab_tmp         = _fjsp_dtox_v2r8(rt);
614             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
615             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
616             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
617
618             vfconv.i[0]     *= 12;
619             vfconv.i[1]     *= 12;
620
621             /* CUBIC SPLINE TABLE ELECTROSTATICS */
622             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
623             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
624             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
625             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
626             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
627             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
628             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
629             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
630             velec            = _fjsp_mul_v2r8(qq21,VV);
631             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
632             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
633
634             /* Update potential sum for this i atom from the interaction with this j atom. */
635             velecsum         = _fjsp_add_v2r8(velecsum,velec);
636
637             fscal            = felec;
638
639             /* Update vectorial force */
640             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
641             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
642             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
643             
644             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
645             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
646             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
647
648             /**************************
649              * CALCULATE INTERACTIONS *
650              **************************/
651
652             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
653
654             /* Calculate table index by multiplying r with table scale and truncate to integer */
655             rt               = _fjsp_mul_v2r8(r22,vftabscale);
656             itab_tmp         = _fjsp_dtox_v2r8(rt);
657             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
658             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
659             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
660
661             vfconv.i[0]     *= 12;
662             vfconv.i[1]     *= 12;
663
664             /* CUBIC SPLINE TABLE ELECTROSTATICS */
665             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
666             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
667             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
668             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
669             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
670             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
671             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
672             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
673             velec            = _fjsp_mul_v2r8(qq22,VV);
674             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
675             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
676
677             /* Update potential sum for this i atom from the interaction with this j atom. */
678             velecsum         = _fjsp_add_v2r8(velecsum,velec);
679
680             fscal            = felec;
681
682             /* Update vectorial force */
683             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
684             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
685             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
686             
687             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
688             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
689             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
690
691             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
692
693             /* Inner loop uses 444 flops */
694         }
695
696         if(jidx<j_index_end)
697         {
698
699             jnrA             = jjnr[jidx];
700             j_coord_offsetA  = DIM*jnrA;
701
702             /* load j atom coordinates */
703             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
704                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
705
706             /* Calculate displacement vector */
707             dx00             = _fjsp_sub_v2r8(ix0,jx0);
708             dy00             = _fjsp_sub_v2r8(iy0,jy0);
709             dz00             = _fjsp_sub_v2r8(iz0,jz0);
710             dx01             = _fjsp_sub_v2r8(ix0,jx1);
711             dy01             = _fjsp_sub_v2r8(iy0,jy1);
712             dz01             = _fjsp_sub_v2r8(iz0,jz1);
713             dx02             = _fjsp_sub_v2r8(ix0,jx2);
714             dy02             = _fjsp_sub_v2r8(iy0,jy2);
715             dz02             = _fjsp_sub_v2r8(iz0,jz2);
716             dx10             = _fjsp_sub_v2r8(ix1,jx0);
717             dy10             = _fjsp_sub_v2r8(iy1,jy0);
718             dz10             = _fjsp_sub_v2r8(iz1,jz0);
719             dx11             = _fjsp_sub_v2r8(ix1,jx1);
720             dy11             = _fjsp_sub_v2r8(iy1,jy1);
721             dz11             = _fjsp_sub_v2r8(iz1,jz1);
722             dx12             = _fjsp_sub_v2r8(ix1,jx2);
723             dy12             = _fjsp_sub_v2r8(iy1,jy2);
724             dz12             = _fjsp_sub_v2r8(iz1,jz2);
725             dx20             = _fjsp_sub_v2r8(ix2,jx0);
726             dy20             = _fjsp_sub_v2r8(iy2,jy0);
727             dz20             = _fjsp_sub_v2r8(iz2,jz0);
728             dx21             = _fjsp_sub_v2r8(ix2,jx1);
729             dy21             = _fjsp_sub_v2r8(iy2,jy1);
730             dz21             = _fjsp_sub_v2r8(iz2,jz1);
731             dx22             = _fjsp_sub_v2r8(ix2,jx2);
732             dy22             = _fjsp_sub_v2r8(iy2,jy2);
733             dz22             = _fjsp_sub_v2r8(iz2,jz2);
734
735             /* Calculate squared distance and things based on it */
736             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
737             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
738             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
739             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
740             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
741             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
742             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
743             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
744             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
745
746             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
747             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
748             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
749             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
750             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
751             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
752             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
753             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
754             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
755
756             fjx0             = _fjsp_setzero_v2r8();
757             fjy0             = _fjsp_setzero_v2r8();
758             fjz0             = _fjsp_setzero_v2r8();
759             fjx1             = _fjsp_setzero_v2r8();
760             fjy1             = _fjsp_setzero_v2r8();
761             fjz1             = _fjsp_setzero_v2r8();
762             fjx2             = _fjsp_setzero_v2r8();
763             fjy2             = _fjsp_setzero_v2r8();
764             fjz2             = _fjsp_setzero_v2r8();
765
766             /**************************
767              * CALCULATE INTERACTIONS *
768              **************************/
769
770             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
771
772             /* Calculate table index by multiplying r with table scale and truncate to integer */
773             rt               = _fjsp_mul_v2r8(r00,vftabscale);
774             itab_tmp         = _fjsp_dtox_v2r8(rt);
775             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
776             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
777             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
778
779             vfconv.i[0]     *= 12;
780             vfconv.i[1]     *= 12;
781
782             /* CUBIC SPLINE TABLE ELECTROSTATICS */
783             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
784             F                = _fjsp_setzero_v2r8();
785             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
786             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
787             H                = _fjsp_setzero_v2r8();
788             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
789             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
790             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
791             velec            = _fjsp_mul_v2r8(qq00,VV);
792             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
793             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
794
795             /* CUBIC SPLINE TABLE DISPERSION */
796             vfconv.i[0]       += 4;
797             vfconv.i[1]       += 4;
798             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
799             F                = _fjsp_setzero_v2r8();
800             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
801             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
802             H                = _fjsp_setzero_v2r8();
803             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
804             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
805             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
806             vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
807             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
808             fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
809
810             /* CUBIC SPLINE TABLE REPULSION */
811             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
812             F                = _fjsp_setzero_v2r8();
813             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
814             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
815             H                = _fjsp_setzero_v2r8();
816             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
817             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
818             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
819             vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
820             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
821             fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
822             vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
823             fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
824
825             /* Update potential sum for this i atom from the interaction with this j atom. */
826             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
827             velecsum         = _fjsp_add_v2r8(velecsum,velec);
828             vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
829             vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
830
831             fscal            = _fjsp_add_v2r8(felec,fvdw);
832
833             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
834
835             /* Update vectorial force */
836             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
837             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
838             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
839             
840             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
841             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
842             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
843
844             /**************************
845              * CALCULATE INTERACTIONS *
846              **************************/
847
848             r01              = _fjsp_mul_v2r8(rsq01,rinv01);
849
850             /* Calculate table index by multiplying r with table scale and truncate to integer */
851             rt               = _fjsp_mul_v2r8(r01,vftabscale);
852             itab_tmp         = _fjsp_dtox_v2r8(rt);
853             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
854             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
855             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
856
857             vfconv.i[0]     *= 12;
858             vfconv.i[1]     *= 12;
859
860             /* CUBIC SPLINE TABLE ELECTROSTATICS */
861             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
862             F                = _fjsp_setzero_v2r8();
863             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
864             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
865             H                = _fjsp_setzero_v2r8();
866             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
867             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
868             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
869             velec            = _fjsp_mul_v2r8(qq01,VV);
870             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
871             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
872
873             /* Update potential sum for this i atom from the interaction with this j atom. */
874             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
875             velecsum         = _fjsp_add_v2r8(velecsum,velec);
876
877             fscal            = felec;
878
879             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
880
881             /* Update vectorial force */
882             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
883             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
884             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
885             
886             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
887             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
888             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
889
890             /**************************
891              * CALCULATE INTERACTIONS *
892              **************************/
893
894             r02              = _fjsp_mul_v2r8(rsq02,rinv02);
895
896             /* Calculate table index by multiplying r with table scale and truncate to integer */
897             rt               = _fjsp_mul_v2r8(r02,vftabscale);
898             itab_tmp         = _fjsp_dtox_v2r8(rt);
899             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
900             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
901             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
902
903             vfconv.i[0]     *= 12;
904             vfconv.i[1]     *= 12;
905
906             /* CUBIC SPLINE TABLE ELECTROSTATICS */
907             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
908             F                = _fjsp_setzero_v2r8();
909             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
910             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
911             H                = _fjsp_setzero_v2r8();
912             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
913             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
914             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
915             velec            = _fjsp_mul_v2r8(qq02,VV);
916             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
917             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
918
919             /* Update potential sum for this i atom from the interaction with this j atom. */
920             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
921             velecsum         = _fjsp_add_v2r8(velecsum,velec);
922
923             fscal            = felec;
924
925             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
926
927             /* Update vectorial force */
928             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
929             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
930             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
931             
932             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
933             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
934             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
935
936             /**************************
937              * CALCULATE INTERACTIONS *
938              **************************/
939
940             r10              = _fjsp_mul_v2r8(rsq10,rinv10);
941
942             /* Calculate table index by multiplying r with table scale and truncate to integer */
943             rt               = _fjsp_mul_v2r8(r10,vftabscale);
944             itab_tmp         = _fjsp_dtox_v2r8(rt);
945             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
946             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
947             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
948
949             vfconv.i[0]     *= 12;
950             vfconv.i[1]     *= 12;
951
952             /* CUBIC SPLINE TABLE ELECTROSTATICS */
953             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
954             F                = _fjsp_setzero_v2r8();
955             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
956             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
957             H                = _fjsp_setzero_v2r8();
958             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
959             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
960             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
961             velec            = _fjsp_mul_v2r8(qq10,VV);
962             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
963             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
964
965             /* Update potential sum for this i atom from the interaction with this j atom. */
966             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
967             velecsum         = _fjsp_add_v2r8(velecsum,velec);
968
969             fscal            = felec;
970
971             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
972
973             /* Update vectorial force */
974             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
975             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
976             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
977             
978             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
979             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
980             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
981
982             /**************************
983              * CALCULATE INTERACTIONS *
984              **************************/
985
986             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
987
988             /* Calculate table index by multiplying r with table scale and truncate to integer */
989             rt               = _fjsp_mul_v2r8(r11,vftabscale);
990             itab_tmp         = _fjsp_dtox_v2r8(rt);
991             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
992             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
993             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
994
995             vfconv.i[0]     *= 12;
996             vfconv.i[1]     *= 12;
997
998             /* CUBIC SPLINE TABLE ELECTROSTATICS */
999             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1000             F                = _fjsp_setzero_v2r8();
1001             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1002             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1003             H                = _fjsp_setzero_v2r8();
1004             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1005             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1006             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
1007             velec            = _fjsp_mul_v2r8(qq11,VV);
1008             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1009             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
1010
1011             /* Update potential sum for this i atom from the interaction with this j atom. */
1012             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1013             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1014
1015             fscal            = felec;
1016
1017             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1018
1019             /* Update vectorial force */
1020             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1021             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1022             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1023             
1024             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1025             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1026             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1027
1028             /**************************
1029              * CALCULATE INTERACTIONS *
1030              **************************/
1031
1032             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
1033
1034             /* Calculate table index by multiplying r with table scale and truncate to integer */
1035             rt               = _fjsp_mul_v2r8(r12,vftabscale);
1036             itab_tmp         = _fjsp_dtox_v2r8(rt);
1037             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1038             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1039             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1040
1041             vfconv.i[0]     *= 12;
1042             vfconv.i[1]     *= 12;
1043
1044             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1045             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1046             F                = _fjsp_setzero_v2r8();
1047             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1048             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1049             H                = _fjsp_setzero_v2r8();
1050             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1051             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1052             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
1053             velec            = _fjsp_mul_v2r8(qq12,VV);
1054             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1055             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
1056
1057             /* Update potential sum for this i atom from the interaction with this j atom. */
1058             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1059             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1060
1061             fscal            = felec;
1062
1063             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1064
1065             /* Update vectorial force */
1066             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1067             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1068             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1069             
1070             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1071             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1072             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1073
1074             /**************************
1075              * CALCULATE INTERACTIONS *
1076              **************************/
1077
1078             r20              = _fjsp_mul_v2r8(rsq20,rinv20);
1079
1080             /* Calculate table index by multiplying r with table scale and truncate to integer */
1081             rt               = _fjsp_mul_v2r8(r20,vftabscale);
1082             itab_tmp         = _fjsp_dtox_v2r8(rt);
1083             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1084             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1085             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1086
1087             vfconv.i[0]     *= 12;
1088             vfconv.i[1]     *= 12;
1089
1090             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1091             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1092             F                = _fjsp_setzero_v2r8();
1093             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1094             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1095             H                = _fjsp_setzero_v2r8();
1096             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1097             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1098             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
1099             velec            = _fjsp_mul_v2r8(qq20,VV);
1100             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1101             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
1102
1103             /* Update potential sum for this i atom from the interaction with this j atom. */
1104             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1105             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1106
1107             fscal            = felec;
1108
1109             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1110
1111             /* Update vectorial force */
1112             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
1113             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1114             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1115             
1116             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1117             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1118             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1119
1120             /**************************
1121              * CALCULATE INTERACTIONS *
1122              **************************/
1123
1124             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
1125
1126             /* Calculate table index by multiplying r with table scale and truncate to integer */
1127             rt               = _fjsp_mul_v2r8(r21,vftabscale);
1128             itab_tmp         = _fjsp_dtox_v2r8(rt);
1129             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1130             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1131             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1132
1133             vfconv.i[0]     *= 12;
1134             vfconv.i[1]     *= 12;
1135
1136             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1137             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1138             F                = _fjsp_setzero_v2r8();
1139             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1140             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1141             H                = _fjsp_setzero_v2r8();
1142             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1143             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1144             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
1145             velec            = _fjsp_mul_v2r8(qq21,VV);
1146             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1147             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
1148
1149             /* Update potential sum for this i atom from the interaction with this j atom. */
1150             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1151             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1152
1153             fscal            = felec;
1154
1155             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1156
1157             /* Update vectorial force */
1158             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1159             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1160             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1161             
1162             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1163             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1164             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1165
1166             /**************************
1167              * CALCULATE INTERACTIONS *
1168              **************************/
1169
1170             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
1171
1172             /* Calculate table index by multiplying r with table scale and truncate to integer */
1173             rt               = _fjsp_mul_v2r8(r22,vftabscale);
1174             itab_tmp         = _fjsp_dtox_v2r8(rt);
1175             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1176             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1177             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1178
1179             vfconv.i[0]     *= 12;
1180             vfconv.i[1]     *= 12;
1181
1182             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1183             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1184             F                = _fjsp_setzero_v2r8();
1185             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1186             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1187             H                = _fjsp_setzero_v2r8();
1188             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1189             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1190             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
1191             velec            = _fjsp_mul_v2r8(qq22,VV);
1192             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1193             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
1194
1195             /* Update potential sum for this i atom from the interaction with this j atom. */
1196             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1197             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1198
1199             fscal            = felec;
1200
1201             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1202
1203             /* Update vectorial force */
1204             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1205             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1206             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1207             
1208             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1209             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1210             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1211
1212             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1213
1214             /* Inner loop uses 444 flops */
1215         }
1216
1217         /* End of innermost loop */
1218
1219         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1220                                               f+i_coord_offset,fshift+i_shift_offset);
1221
1222         ggid                        = gid[iidx];
1223         /* Update potential energies */
1224         gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
1225         gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
1226
1227         /* Increment number of inner iterations */
1228         inneriter                  += j_index_end - j_index_start;
1229
1230         /* Outer loop uses 20 flops */
1231     }
1232
1233     /* Increment number of outer iterations */
1234     outeriter        += nri;
1235
1236     /* Update outer/inner flops */
1237
1238     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*444);
1239 }
1240 /*
1241  * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
1242  * Electrostatics interaction: CubicSplineTable
1243  * VdW interaction:            CubicSplineTable
1244  * Geometry:                   Water3-Water3
1245  * Calculate force/pot:        Force
1246  */
1247 void
1248 nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
1249                     (t_nblist                    * gmx_restrict       nlist,
1250                      rvec                        * gmx_restrict          xx,
1251                      rvec                        * gmx_restrict          ff,
1252                      t_forcerec                  * gmx_restrict          fr,
1253                      t_mdatoms                   * gmx_restrict     mdatoms,
1254                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
1255                      t_nrnb                      * gmx_restrict        nrnb)
1256 {
1257     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1258      * just 0 for non-waters.
1259      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
1260      * jnr indices corresponding to data put in the four positions in the SIMD register.
1261      */
1262     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
1263     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1264     int              jnrA,jnrB;
1265     int              j_coord_offsetA,j_coord_offsetB;
1266     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
1267     real             rcutoff_scalar;
1268     real             *shiftvec,*fshift,*x,*f;
1269     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1270     int              vdwioffset0;
1271     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1272     int              vdwioffset1;
1273     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1274     int              vdwioffset2;
1275     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1276     int              vdwjidx0A,vdwjidx0B;
1277     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1278     int              vdwjidx1A,vdwjidx1B;
1279     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1280     int              vdwjidx2A,vdwjidx2B;
1281     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1282     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1283     _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
1284     _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
1285     _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
1286     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1287     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1288     _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
1289     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1290     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1291     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
1292     real             *charge;
1293     int              nvdwtype;
1294     _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
1295     int              *vdwtype;
1296     real             *vdwparam;
1297     _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
1298     _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
1299     _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
1300     real             *vftab;
1301     _fjsp_v2r8       itab_tmp;
1302     _fjsp_v2r8       dummy_mask,cutoff_mask;
1303     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
1304     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
1305     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1306
1307     x                = xx[0];
1308     f                = ff[0];
1309
1310     nri              = nlist->nri;
1311     iinr             = nlist->iinr;
1312     jindex           = nlist->jindex;
1313     jjnr             = nlist->jjnr;
1314     shiftidx         = nlist->shift;
1315     gid              = nlist->gid;
1316     shiftvec         = fr->shift_vec[0];
1317     fshift           = fr->fshift[0];
1318     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
1319     charge           = mdatoms->chargeA;
1320     nvdwtype         = fr->ntype;
1321     vdwparam         = fr->nbfp;
1322     vdwtype          = mdatoms->typeA;
1323
1324     vftab            = kernel_data->table_elec_vdw->data;
1325     vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale);
1326
1327     /* Setup water-specific parameters */
1328     inr              = nlist->iinr[0];
1329     iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
1330     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1331     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1332     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
1333
1334     jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
1335     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
1336     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
1337     vdwjidx0A        = 2*vdwtype[inr+0];
1338     qq00             = _fjsp_mul_v2r8(iq0,jq0);
1339     c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
1340     c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
1341     qq01             = _fjsp_mul_v2r8(iq0,jq1);
1342     qq02             = _fjsp_mul_v2r8(iq0,jq2);
1343     qq10             = _fjsp_mul_v2r8(iq1,jq0);
1344     qq11             = _fjsp_mul_v2r8(iq1,jq1);
1345     qq12             = _fjsp_mul_v2r8(iq1,jq2);
1346     qq20             = _fjsp_mul_v2r8(iq2,jq0);
1347     qq21             = _fjsp_mul_v2r8(iq2,jq1);
1348     qq22             = _fjsp_mul_v2r8(iq2,jq2);
1349
1350     /* Avoid stupid compiler warnings */
1351     jnrA = jnrB = 0;
1352     j_coord_offsetA = 0;
1353     j_coord_offsetB = 0;
1354
1355     outeriter        = 0;
1356     inneriter        = 0;
1357
1358     /* Start outer loop over neighborlists */
1359     for(iidx=0; iidx<nri; iidx++)
1360     {
1361         /* Load shift vector for this list */
1362         i_shift_offset   = DIM*shiftidx[iidx];
1363
1364         /* Load limits for loop over neighbors */
1365         j_index_start    = jindex[iidx];
1366         j_index_end      = jindex[iidx+1];
1367
1368         /* Get outer coordinate index */
1369         inr              = iinr[iidx];
1370         i_coord_offset   = DIM*inr;
1371
1372         /* Load i particle coords and add shift vector */
1373         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
1374                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
1375
1376         fix0             = _fjsp_setzero_v2r8();
1377         fiy0             = _fjsp_setzero_v2r8();
1378         fiz0             = _fjsp_setzero_v2r8();
1379         fix1             = _fjsp_setzero_v2r8();
1380         fiy1             = _fjsp_setzero_v2r8();
1381         fiz1             = _fjsp_setzero_v2r8();
1382         fix2             = _fjsp_setzero_v2r8();
1383         fiy2             = _fjsp_setzero_v2r8();
1384         fiz2             = _fjsp_setzero_v2r8();
1385
1386         /* Start inner kernel loop */
1387         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1388         {
1389
1390             /* Get j neighbor index, and coordinate index */
1391             jnrA             = jjnr[jidx];
1392             jnrB             = jjnr[jidx+1];
1393             j_coord_offsetA  = DIM*jnrA;
1394             j_coord_offsetB  = DIM*jnrB;
1395
1396             /* load j atom coordinates */
1397             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
1398                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1399
1400             /* Calculate displacement vector */
1401             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1402             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1403             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1404             dx01             = _fjsp_sub_v2r8(ix0,jx1);
1405             dy01             = _fjsp_sub_v2r8(iy0,jy1);
1406             dz01             = _fjsp_sub_v2r8(iz0,jz1);
1407             dx02             = _fjsp_sub_v2r8(ix0,jx2);
1408             dy02             = _fjsp_sub_v2r8(iy0,jy2);
1409             dz02             = _fjsp_sub_v2r8(iz0,jz2);
1410             dx10             = _fjsp_sub_v2r8(ix1,jx0);
1411             dy10             = _fjsp_sub_v2r8(iy1,jy0);
1412             dz10             = _fjsp_sub_v2r8(iz1,jz0);
1413             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1414             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1415             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1416             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1417             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1418             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1419             dx20             = _fjsp_sub_v2r8(ix2,jx0);
1420             dy20             = _fjsp_sub_v2r8(iy2,jy0);
1421             dz20             = _fjsp_sub_v2r8(iz2,jz0);
1422             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1423             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1424             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1425             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1426             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1427             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1428
1429             /* Calculate squared distance and things based on it */
1430             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1431             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1432             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1433             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1434             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1435             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1436             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1437             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1438             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1439
1440             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
1441             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
1442             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
1443             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
1444             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1445             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1446             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
1447             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1448             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1449
1450             fjx0             = _fjsp_setzero_v2r8();
1451             fjy0             = _fjsp_setzero_v2r8();
1452             fjz0             = _fjsp_setzero_v2r8();
1453             fjx1             = _fjsp_setzero_v2r8();
1454             fjy1             = _fjsp_setzero_v2r8();
1455             fjz1             = _fjsp_setzero_v2r8();
1456             fjx2             = _fjsp_setzero_v2r8();
1457             fjy2             = _fjsp_setzero_v2r8();
1458             fjz2             = _fjsp_setzero_v2r8();
1459
1460             /**************************
1461              * CALCULATE INTERACTIONS *
1462              **************************/
1463
1464             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
1465
1466             /* Calculate table index by multiplying r with table scale and truncate to integer */
1467             rt               = _fjsp_mul_v2r8(r00,vftabscale);
1468             itab_tmp         = _fjsp_dtox_v2r8(rt);
1469             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1470             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1471             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1472
1473             vfconv.i[0]     *= 12;
1474             vfconv.i[1]     *= 12;
1475
1476             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1477             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1478             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1479             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1480             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1481             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1482             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1483             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1484             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1485             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
1486
1487             /* CUBIC SPLINE TABLE DISPERSION */
1488             vfconv.i[0]       += 4;
1489             vfconv.i[1]       += 4;
1490             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1491             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1492             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1493             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
1494             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
1495             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1496             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1497             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1498             fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
1499
1500             /* CUBIC SPLINE TABLE REPULSION */
1501             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
1502             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
1503             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1504             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
1505             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
1506             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1507             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1508             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1509             fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
1510             fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
1511
1512             fscal            = _fjsp_add_v2r8(felec,fvdw);
1513
1514             /* Update vectorial force */
1515             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1516             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1517             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1518             
1519             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1520             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1521             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1522
1523             /**************************
1524              * CALCULATE INTERACTIONS *
1525              **************************/
1526
1527             r01              = _fjsp_mul_v2r8(rsq01,rinv01);
1528
1529             /* Calculate table index by multiplying r with table scale and truncate to integer */
1530             rt               = _fjsp_mul_v2r8(r01,vftabscale);
1531             itab_tmp         = _fjsp_dtox_v2r8(rt);
1532             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1533             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1534             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1535
1536             vfconv.i[0]     *= 12;
1537             vfconv.i[1]     *= 12;
1538
1539             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1540             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1541             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1542             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1543             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1544             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1545             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1546             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1547             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1548             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
1549
1550             fscal            = felec;
1551
1552             /* Update vectorial force */
1553             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
1554             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1555             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1556             
1557             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1558             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1559             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1560
1561             /**************************
1562              * CALCULATE INTERACTIONS *
1563              **************************/
1564
1565             r02              = _fjsp_mul_v2r8(rsq02,rinv02);
1566
1567             /* Calculate table index by multiplying r with table scale and truncate to integer */
1568             rt               = _fjsp_mul_v2r8(r02,vftabscale);
1569             itab_tmp         = _fjsp_dtox_v2r8(rt);
1570             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1571             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1572             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1573
1574             vfconv.i[0]     *= 12;
1575             vfconv.i[1]     *= 12;
1576
1577             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1578             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1579             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1580             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1581             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1582             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1583             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1584             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1585             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1586             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
1587
1588             fscal            = felec;
1589
1590             /* Update vectorial force */
1591             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
1592             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1593             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1594             
1595             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1596             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1597             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1598
1599             /**************************
1600              * CALCULATE INTERACTIONS *
1601              **************************/
1602
1603             r10              = _fjsp_mul_v2r8(rsq10,rinv10);
1604
1605             /* Calculate table index by multiplying r with table scale and truncate to integer */
1606             rt               = _fjsp_mul_v2r8(r10,vftabscale);
1607             itab_tmp         = _fjsp_dtox_v2r8(rt);
1608             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1609             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1610             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1611
1612             vfconv.i[0]     *= 12;
1613             vfconv.i[1]     *= 12;
1614
1615             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1616             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1617             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1618             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1619             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1620             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1621             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1622             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1623             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1624             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
1625
1626             fscal            = felec;
1627
1628             /* Update vectorial force */
1629             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
1630             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1631             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1632             
1633             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1634             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1635             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1636
1637             /**************************
1638              * CALCULATE INTERACTIONS *
1639              **************************/
1640
1641             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
1642
1643             /* Calculate table index by multiplying r with table scale and truncate to integer */
1644             rt               = _fjsp_mul_v2r8(r11,vftabscale);
1645             itab_tmp         = _fjsp_dtox_v2r8(rt);
1646             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1647             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1648             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1649
1650             vfconv.i[0]     *= 12;
1651             vfconv.i[1]     *= 12;
1652
1653             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1654             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1655             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1656             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1657             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1658             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1659             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1660             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1661             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1662             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
1663
1664             fscal            = felec;
1665
1666             /* Update vectorial force */
1667             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1668             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1669             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1670             
1671             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1672             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1673             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1674
1675             /**************************
1676              * CALCULATE INTERACTIONS *
1677              **************************/
1678
1679             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
1680
1681             /* Calculate table index by multiplying r with table scale and truncate to integer */
1682             rt               = _fjsp_mul_v2r8(r12,vftabscale);
1683             itab_tmp         = _fjsp_dtox_v2r8(rt);
1684             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1685             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1686             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1687
1688             vfconv.i[0]     *= 12;
1689             vfconv.i[1]     *= 12;
1690
1691             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1692             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1693             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1694             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1695             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1696             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1697             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1698             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1699             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1700             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
1701
1702             fscal            = felec;
1703
1704             /* Update vectorial force */
1705             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1706             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1707             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1708             
1709             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1710             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1711             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1712
1713             /**************************
1714              * CALCULATE INTERACTIONS *
1715              **************************/
1716
1717             r20              = _fjsp_mul_v2r8(rsq20,rinv20);
1718
1719             /* Calculate table index by multiplying r with table scale and truncate to integer */
1720             rt               = _fjsp_mul_v2r8(r20,vftabscale);
1721             itab_tmp         = _fjsp_dtox_v2r8(rt);
1722             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1723             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1724             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1725
1726             vfconv.i[0]     *= 12;
1727             vfconv.i[1]     *= 12;
1728
1729             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1730             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1731             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1732             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1733             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1734             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1735             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1736             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1737             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1738             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
1739
1740             fscal            = felec;
1741
1742             /* Update vectorial force */
1743             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
1744             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1745             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1746             
1747             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1748             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1749             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1750
1751             /**************************
1752              * CALCULATE INTERACTIONS *
1753              **************************/
1754
1755             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
1756
1757             /* Calculate table index by multiplying r with table scale and truncate to integer */
1758             rt               = _fjsp_mul_v2r8(r21,vftabscale);
1759             itab_tmp         = _fjsp_dtox_v2r8(rt);
1760             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1761             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1762             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1763
1764             vfconv.i[0]     *= 12;
1765             vfconv.i[1]     *= 12;
1766
1767             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1768             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1769             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1770             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1771             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1772             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1773             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1774             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1775             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1776             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
1777
1778             fscal            = felec;
1779
1780             /* Update vectorial force */
1781             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1782             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1783             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1784             
1785             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1786             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1787             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1788
1789             /**************************
1790              * CALCULATE INTERACTIONS *
1791              **************************/
1792
1793             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
1794
1795             /* Calculate table index by multiplying r with table scale and truncate to integer */
1796             rt               = _fjsp_mul_v2r8(r22,vftabscale);
1797             itab_tmp         = _fjsp_dtox_v2r8(rt);
1798             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1799             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1800             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1801
1802             vfconv.i[0]     *= 12;
1803             vfconv.i[1]     *= 12;
1804
1805             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1806             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1807             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1808             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1809             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1810             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1811             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1812             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1813             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1814             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
1815
1816             fscal            = felec;
1817
1818             /* Update vectorial force */
1819             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1820             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1821             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1822             
1823             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1824             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1825             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1826
1827             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1828
1829             /* Inner loop uses 400 flops */
1830         }
1831
1832         if(jidx<j_index_end)
1833         {
1834
1835             jnrA             = jjnr[jidx];
1836             j_coord_offsetA  = DIM*jnrA;
1837
1838             /* load j atom coordinates */
1839             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1840                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1841
1842             /* Calculate displacement vector */
1843             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1844             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1845             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1846             dx01             = _fjsp_sub_v2r8(ix0,jx1);
1847             dy01             = _fjsp_sub_v2r8(iy0,jy1);
1848             dz01             = _fjsp_sub_v2r8(iz0,jz1);
1849             dx02             = _fjsp_sub_v2r8(ix0,jx2);
1850             dy02             = _fjsp_sub_v2r8(iy0,jy2);
1851             dz02             = _fjsp_sub_v2r8(iz0,jz2);
1852             dx10             = _fjsp_sub_v2r8(ix1,jx0);
1853             dy10             = _fjsp_sub_v2r8(iy1,jy0);
1854             dz10             = _fjsp_sub_v2r8(iz1,jz0);
1855             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1856             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1857             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1858             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1859             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1860             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1861             dx20             = _fjsp_sub_v2r8(ix2,jx0);
1862             dy20             = _fjsp_sub_v2r8(iy2,jy0);
1863             dz20             = _fjsp_sub_v2r8(iz2,jz0);
1864             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1865             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1866             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1867             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1868             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1869             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1870
1871             /* Calculate squared distance and things based on it */
1872             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1873             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1874             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1875             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1876             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1877             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1878             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1879             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1880             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1881
1882             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
1883             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
1884             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
1885             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
1886             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1887             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1888             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
1889             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1890             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1891
1892             fjx0             = _fjsp_setzero_v2r8();
1893             fjy0             = _fjsp_setzero_v2r8();
1894             fjz0             = _fjsp_setzero_v2r8();
1895             fjx1             = _fjsp_setzero_v2r8();
1896             fjy1             = _fjsp_setzero_v2r8();
1897             fjz1             = _fjsp_setzero_v2r8();
1898             fjx2             = _fjsp_setzero_v2r8();
1899             fjy2             = _fjsp_setzero_v2r8();
1900             fjz2             = _fjsp_setzero_v2r8();
1901
1902             /**************************
1903              * CALCULATE INTERACTIONS *
1904              **************************/
1905
1906             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
1907
1908             /* Calculate table index by multiplying r with table scale and truncate to integer */
1909             rt               = _fjsp_mul_v2r8(r00,vftabscale);
1910             itab_tmp         = _fjsp_dtox_v2r8(rt);
1911             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1912             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1913             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1914
1915             vfconv.i[0]     *= 12;
1916             vfconv.i[1]     *= 12;
1917
1918             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1919             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1920             F                = _fjsp_setzero_v2r8();
1921             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1922             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1923             H                = _fjsp_setzero_v2r8();
1924             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1925             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1926             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1927             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
1928
1929             /* CUBIC SPLINE TABLE DISPERSION */
1930             vfconv.i[0]       += 4;
1931             vfconv.i[1]       += 4;
1932             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1933             F                = _fjsp_setzero_v2r8();
1934             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1935             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
1936             H                = _fjsp_setzero_v2r8();
1937             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1938             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1939             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1940             fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
1941
1942             /* CUBIC SPLINE TABLE REPULSION */
1943             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
1944             F                = _fjsp_setzero_v2r8();
1945             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1946             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
1947             H                = _fjsp_setzero_v2r8();
1948             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1949             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1950             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1951             fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
1952             fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
1953
1954             fscal            = _fjsp_add_v2r8(felec,fvdw);
1955
1956             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1957
1958             /* Update vectorial force */
1959             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1960             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1961             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1962             
1963             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1964             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1965             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1966
1967             /**************************
1968              * CALCULATE INTERACTIONS *
1969              **************************/
1970
1971             r01              = _fjsp_mul_v2r8(rsq01,rinv01);
1972
1973             /* Calculate table index by multiplying r with table scale and truncate to integer */
1974             rt               = _fjsp_mul_v2r8(r01,vftabscale);
1975             itab_tmp         = _fjsp_dtox_v2r8(rt);
1976             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1977             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1978             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1979
1980             vfconv.i[0]     *= 12;
1981             vfconv.i[1]     *= 12;
1982
1983             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1984             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1985             F                = _fjsp_setzero_v2r8();
1986             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1987             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1988             H                = _fjsp_setzero_v2r8();
1989             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1990             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1991             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1992             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
1993
1994             fscal            = felec;
1995
1996             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1997
1998             /* Update vectorial force */
1999             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
2000             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
2001             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
2002             
2003             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
2004             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
2005             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
2006
2007             /**************************
2008              * CALCULATE INTERACTIONS *
2009              **************************/
2010
2011             r02              = _fjsp_mul_v2r8(rsq02,rinv02);
2012
2013             /* Calculate table index by multiplying r with table scale and truncate to integer */
2014             rt               = _fjsp_mul_v2r8(r02,vftabscale);
2015             itab_tmp         = _fjsp_dtox_v2r8(rt);
2016             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2017             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
2018             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2019
2020             vfconv.i[0]     *= 12;
2021             vfconv.i[1]     *= 12;
2022
2023             /* CUBIC SPLINE TABLE ELECTROSTATICS */
2024             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2025             F                = _fjsp_setzero_v2r8();
2026             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2027             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2028             H                = _fjsp_setzero_v2r8();
2029             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2030             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2031             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2032             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
2033
2034             fscal            = felec;
2035
2036             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2037
2038             /* Update vectorial force */
2039             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
2040             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
2041             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
2042             
2043             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
2044             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
2045             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
2046
2047             /**************************
2048              * CALCULATE INTERACTIONS *
2049              **************************/
2050
2051             r10              = _fjsp_mul_v2r8(rsq10,rinv10);
2052
2053             /* Calculate table index by multiplying r with table scale and truncate to integer */
2054             rt               = _fjsp_mul_v2r8(r10,vftabscale);
2055             itab_tmp         = _fjsp_dtox_v2r8(rt);
2056             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2057             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
2058             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2059
2060             vfconv.i[0]     *= 12;
2061             vfconv.i[1]     *= 12;
2062
2063             /* CUBIC SPLINE TABLE ELECTROSTATICS */
2064             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2065             F                = _fjsp_setzero_v2r8();
2066             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2067             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2068             H                = _fjsp_setzero_v2r8();
2069             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2070             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2071             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2072             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
2073
2074             fscal            = felec;
2075
2076             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2077
2078             /* Update vectorial force */
2079             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
2080             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
2081             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
2082             
2083             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
2084             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
2085             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
2086
2087             /**************************
2088              * CALCULATE INTERACTIONS *
2089              **************************/
2090
2091             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
2092
2093             /* Calculate table index by multiplying r with table scale and truncate to integer */
2094             rt               = _fjsp_mul_v2r8(r11,vftabscale);
2095             itab_tmp         = _fjsp_dtox_v2r8(rt);
2096             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2097             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
2098             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2099
2100             vfconv.i[0]     *= 12;
2101             vfconv.i[1]     *= 12;
2102
2103             /* CUBIC SPLINE TABLE ELECTROSTATICS */
2104             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2105             F                = _fjsp_setzero_v2r8();
2106             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2107             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2108             H                = _fjsp_setzero_v2r8();
2109             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2110             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2111             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2112             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
2113
2114             fscal            = felec;
2115
2116             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2117
2118             /* Update vectorial force */
2119             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
2120             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
2121             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
2122             
2123             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
2124             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
2125             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
2126
2127             /**************************
2128              * CALCULATE INTERACTIONS *
2129              **************************/
2130
2131             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
2132
2133             /* Calculate table index by multiplying r with table scale and truncate to integer */
2134             rt               = _fjsp_mul_v2r8(r12,vftabscale);
2135             itab_tmp         = _fjsp_dtox_v2r8(rt);
2136             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2137             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
2138             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2139
2140             vfconv.i[0]     *= 12;
2141             vfconv.i[1]     *= 12;
2142
2143             /* CUBIC SPLINE TABLE ELECTROSTATICS */
2144             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2145             F                = _fjsp_setzero_v2r8();
2146             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2147             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2148             H                = _fjsp_setzero_v2r8();
2149             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2150             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2151             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2152             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
2153
2154             fscal            = felec;
2155
2156             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2157
2158             /* Update vectorial force */
2159             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
2160             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
2161             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
2162             
2163             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
2164             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
2165             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
2166
2167             /**************************
2168              * CALCULATE INTERACTIONS *
2169              **************************/
2170
2171             r20              = _fjsp_mul_v2r8(rsq20,rinv20);
2172
2173             /* Calculate table index by multiplying r with table scale and truncate to integer */
2174             rt               = _fjsp_mul_v2r8(r20,vftabscale);
2175             itab_tmp         = _fjsp_dtox_v2r8(rt);
2176             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2177             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
2178             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2179
2180             vfconv.i[0]     *= 12;
2181             vfconv.i[1]     *= 12;
2182
2183             /* CUBIC SPLINE TABLE ELECTROSTATICS */
2184             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2185             F                = _fjsp_setzero_v2r8();
2186             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2187             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2188             H                = _fjsp_setzero_v2r8();
2189             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2190             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2191             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2192             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
2193
2194             fscal            = felec;
2195
2196             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2197
2198             /* Update vectorial force */
2199             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
2200             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
2201             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
2202             
2203             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
2204             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
2205             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
2206
2207             /**************************
2208              * CALCULATE INTERACTIONS *
2209              **************************/
2210
2211             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
2212
2213             /* Calculate table index by multiplying r with table scale and truncate to integer */
2214             rt               = _fjsp_mul_v2r8(r21,vftabscale);
2215             itab_tmp         = _fjsp_dtox_v2r8(rt);
2216             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2217             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
2218             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2219
2220             vfconv.i[0]     *= 12;
2221             vfconv.i[1]     *= 12;
2222
2223             /* CUBIC SPLINE TABLE ELECTROSTATICS */
2224             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2225             F                = _fjsp_setzero_v2r8();
2226             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2227             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2228             H                = _fjsp_setzero_v2r8();
2229             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2230             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2231             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2232             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
2233
2234             fscal            = felec;
2235
2236             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2237
2238             /* Update vectorial force */
2239             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
2240             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
2241             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
2242             
2243             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
2244             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
2245             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
2246
2247             /**************************
2248              * CALCULATE INTERACTIONS *
2249              **************************/
2250
2251             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
2252
2253             /* Calculate table index by multiplying r with table scale and truncate to integer */
2254             rt               = _fjsp_mul_v2r8(r22,vftabscale);
2255             itab_tmp         = _fjsp_dtox_v2r8(rt);
2256             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2257             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
2258             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2259
2260             vfconv.i[0]     *= 12;
2261             vfconv.i[1]     *= 12;
2262
2263             /* CUBIC SPLINE TABLE ELECTROSTATICS */
2264             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2265             F                = _fjsp_setzero_v2r8();
2266             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2267             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2268             H                = _fjsp_setzero_v2r8();
2269             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2270             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2271             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2272             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
2273
2274             fscal            = felec;
2275
2276             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2277
2278             /* Update vectorial force */
2279             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
2280             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
2281             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
2282             
2283             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
2284             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
2285             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
2286
2287             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
2288
2289             /* Inner loop uses 400 flops */
2290         }
2291
2292         /* End of innermost loop */
2293
2294         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
2295                                               f+i_coord_offset,fshift+i_shift_offset);
2296
2297         /* Increment number of inner iterations */
2298         inneriter                  += j_index_end - j_index_start;
2299
2300         /* Outer loop uses 18 flops */
2301     }
2302
2303     /* Increment number of outer iterations */
2304     outeriter        += nri;
2305
2306     /* Update outer/inner flops */
2307
2308     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*400);
2309 }