42e2b4b00e0d0012e0afc758ce4233d50cb4c1d7
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_sparc64_hpc_ace_double / nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_sparc64_hpc_ace_double.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
37  */
38 #include "config.h"
39
40 #include <math.h>
41
42 #include "../nb_kernel.h"
43 #include "gromacs/legacyheaders/types/simple.h"
44 #include "gromacs/math/vec.h"
45 #include "gromacs/legacyheaders/nrnb.h"
46
47 #include "kernelutil_sparc64_hpc_ace_double.h"
48
49 /*
50  * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
51  * Electrostatics interaction: ReactionField
52  * VdW interaction:            CubicSplineTable
53  * Geometry:                   Water3-Water3
54  * Calculate force/pot:        PotentialAndForce
55  */
56 void
57 nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
58                     (t_nblist                    * gmx_restrict       nlist,
59                      rvec                        * gmx_restrict          xx,
60                      rvec                        * gmx_restrict          ff,
61                      t_forcerec                  * gmx_restrict          fr,
62                      t_mdatoms                   * gmx_restrict     mdatoms,
63                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
64                      t_nrnb                      * gmx_restrict        nrnb)
65 {
66     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
67      * just 0 for non-waters.
68      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
69      * jnr indices corresponding to data put in the four positions in the SIMD register.
70      */
71     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
72     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
73     int              jnrA,jnrB;
74     int              j_coord_offsetA,j_coord_offsetB;
75     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
76     real             rcutoff_scalar;
77     real             *shiftvec,*fshift,*x,*f;
78     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
79     int              vdwioffset0;
80     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
81     int              vdwioffset1;
82     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
83     int              vdwioffset2;
84     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
85     int              vdwjidx0A,vdwjidx0B;
86     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
87     int              vdwjidx1A,vdwjidx1B;
88     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
89     int              vdwjidx2A,vdwjidx2B;
90     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
91     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
92     _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
93     _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
94     _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
95     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
96     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
97     _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
98     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
99     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
100     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
101     real             *charge;
102     int              nvdwtype;
103     _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
104     int              *vdwtype;
105     real             *vdwparam;
106     _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
107     _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
108     _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
109     real             *vftab;
110     _fjsp_v2r8       itab_tmp;
111     _fjsp_v2r8       dummy_mask,cutoff_mask;
112     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
113     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
114     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
115
116     x                = xx[0];
117     f                = ff[0];
118
119     nri              = nlist->nri;
120     iinr             = nlist->iinr;
121     jindex           = nlist->jindex;
122     jjnr             = nlist->jjnr;
123     shiftidx         = nlist->shift;
124     gid              = nlist->gid;
125     shiftvec         = fr->shift_vec[0];
126     fshift           = fr->fshift[0];
127     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
128     charge           = mdatoms->chargeA;
129     krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
130     krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
131     crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
132     nvdwtype         = fr->ntype;
133     vdwparam         = fr->nbfp;
134     vdwtype          = mdatoms->typeA;
135
136     vftab            = kernel_data->table_vdw->data;
137     vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
138
139     /* Setup water-specific parameters */
140     inr              = nlist->iinr[0];
141     iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
142     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
143     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
144     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
145
146     jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
147     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
148     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
149     vdwjidx0A        = 2*vdwtype[inr+0];
150     qq00             = _fjsp_mul_v2r8(iq0,jq0);
151     c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
152     c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
153     qq01             = _fjsp_mul_v2r8(iq0,jq1);
154     qq02             = _fjsp_mul_v2r8(iq0,jq2);
155     qq10             = _fjsp_mul_v2r8(iq1,jq0);
156     qq11             = _fjsp_mul_v2r8(iq1,jq1);
157     qq12             = _fjsp_mul_v2r8(iq1,jq2);
158     qq20             = _fjsp_mul_v2r8(iq2,jq0);
159     qq21             = _fjsp_mul_v2r8(iq2,jq1);
160     qq22             = _fjsp_mul_v2r8(iq2,jq2);
161
162     /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
163     rcutoff_scalar   = fr->rcoulomb;
164     rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
165     rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
166
167     /* Avoid stupid compiler warnings */
168     jnrA = jnrB = 0;
169     j_coord_offsetA = 0;
170     j_coord_offsetB = 0;
171
172     outeriter        = 0;
173     inneriter        = 0;
174
175     /* Start outer loop over neighborlists */
176     for(iidx=0; iidx<nri; iidx++)
177     {
178         /* Load shift vector for this list */
179         i_shift_offset   = DIM*shiftidx[iidx];
180
181         /* Load limits for loop over neighbors */
182         j_index_start    = jindex[iidx];
183         j_index_end      = jindex[iidx+1];
184
185         /* Get outer coordinate index */
186         inr              = iinr[iidx];
187         i_coord_offset   = DIM*inr;
188
189         /* Load i particle coords and add shift vector */
190         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
191                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
192
193         fix0             = _fjsp_setzero_v2r8();
194         fiy0             = _fjsp_setzero_v2r8();
195         fiz0             = _fjsp_setzero_v2r8();
196         fix1             = _fjsp_setzero_v2r8();
197         fiy1             = _fjsp_setzero_v2r8();
198         fiz1             = _fjsp_setzero_v2r8();
199         fix2             = _fjsp_setzero_v2r8();
200         fiy2             = _fjsp_setzero_v2r8();
201         fiz2             = _fjsp_setzero_v2r8();
202
203         /* Reset potential sums */
204         velecsum         = _fjsp_setzero_v2r8();
205         vvdwsum          = _fjsp_setzero_v2r8();
206
207         /* Start inner kernel loop */
208         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
209         {
210
211             /* Get j neighbor index, and coordinate index */
212             jnrA             = jjnr[jidx];
213             jnrB             = jjnr[jidx+1];
214             j_coord_offsetA  = DIM*jnrA;
215             j_coord_offsetB  = DIM*jnrB;
216
217             /* load j atom coordinates */
218             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
219                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
220
221             /* Calculate displacement vector */
222             dx00             = _fjsp_sub_v2r8(ix0,jx0);
223             dy00             = _fjsp_sub_v2r8(iy0,jy0);
224             dz00             = _fjsp_sub_v2r8(iz0,jz0);
225             dx01             = _fjsp_sub_v2r8(ix0,jx1);
226             dy01             = _fjsp_sub_v2r8(iy0,jy1);
227             dz01             = _fjsp_sub_v2r8(iz0,jz1);
228             dx02             = _fjsp_sub_v2r8(ix0,jx2);
229             dy02             = _fjsp_sub_v2r8(iy0,jy2);
230             dz02             = _fjsp_sub_v2r8(iz0,jz2);
231             dx10             = _fjsp_sub_v2r8(ix1,jx0);
232             dy10             = _fjsp_sub_v2r8(iy1,jy0);
233             dz10             = _fjsp_sub_v2r8(iz1,jz0);
234             dx11             = _fjsp_sub_v2r8(ix1,jx1);
235             dy11             = _fjsp_sub_v2r8(iy1,jy1);
236             dz11             = _fjsp_sub_v2r8(iz1,jz1);
237             dx12             = _fjsp_sub_v2r8(ix1,jx2);
238             dy12             = _fjsp_sub_v2r8(iy1,jy2);
239             dz12             = _fjsp_sub_v2r8(iz1,jz2);
240             dx20             = _fjsp_sub_v2r8(ix2,jx0);
241             dy20             = _fjsp_sub_v2r8(iy2,jy0);
242             dz20             = _fjsp_sub_v2r8(iz2,jz0);
243             dx21             = _fjsp_sub_v2r8(ix2,jx1);
244             dy21             = _fjsp_sub_v2r8(iy2,jy1);
245             dz21             = _fjsp_sub_v2r8(iz2,jz1);
246             dx22             = _fjsp_sub_v2r8(ix2,jx2);
247             dy22             = _fjsp_sub_v2r8(iy2,jy2);
248             dz22             = _fjsp_sub_v2r8(iz2,jz2);
249
250             /* Calculate squared distance and things based on it */
251             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
252             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
253             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
254             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
255             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
256             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
257             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
258             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
259             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
260
261             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
262             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
263             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
264             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
265             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
266             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
267             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
268             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
269             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
270
271             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
272             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
273             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
274             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
275             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
276             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
277             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
278             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
279             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
280
281             fjx0             = _fjsp_setzero_v2r8();
282             fjy0             = _fjsp_setzero_v2r8();
283             fjz0             = _fjsp_setzero_v2r8();
284             fjx1             = _fjsp_setzero_v2r8();
285             fjy1             = _fjsp_setzero_v2r8();
286             fjz1             = _fjsp_setzero_v2r8();
287             fjx2             = _fjsp_setzero_v2r8();
288             fjy2             = _fjsp_setzero_v2r8();
289             fjz2             = _fjsp_setzero_v2r8();
290
291             /**************************
292              * CALCULATE INTERACTIONS *
293              **************************/
294
295             if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
296             {
297
298             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
299
300             /* Calculate table index by multiplying r with table scale and truncate to integer */
301             rt               = _fjsp_mul_v2r8(r00,vftabscale);
302             itab_tmp         = _fjsp_dtox_v2r8(rt);
303             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
304             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
305             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
306
307             vfconv.i[0]     *= 8;
308             vfconv.i[1]     *= 8;
309
310             /* REACTION-FIELD ELECTROSTATICS */
311             velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
312             felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
313
314             /* CUBIC SPLINE TABLE DISPERSION */
315             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
316             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
317             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
318             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
319             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
320             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
321             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
322             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
323             vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
324             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
325             fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
326
327             /* CUBIC SPLINE TABLE REPULSION */
328             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
329             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
330             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
331             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
332             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
333             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
334             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
335             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
336             vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
337             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
338             fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
339             vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
340             fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
341
342             cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
343
344             /* Update potential sum for this i atom from the interaction with this j atom. */
345             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
346             velecsum         = _fjsp_add_v2r8(velecsum,velec);
347             vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
348             vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
349
350             fscal            = _fjsp_add_v2r8(felec,fvdw);
351
352             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
353
354             /* Update vectorial force */
355             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
356             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
357             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
358             
359             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
360             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
361             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
362
363             }
364
365             /**************************
366              * CALCULATE INTERACTIONS *
367              **************************/
368
369             if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
370             {
371
372             /* REACTION-FIELD ELECTROSTATICS */
373             velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
374             felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
375
376             cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
377
378             /* Update potential sum for this i atom from the interaction with this j atom. */
379             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
380             velecsum         = _fjsp_add_v2r8(velecsum,velec);
381
382             fscal            = felec;
383
384             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
385
386             /* Update vectorial force */
387             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
388             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
389             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
390             
391             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
392             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
393             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
394
395             }
396
397             /**************************
398              * CALCULATE INTERACTIONS *
399              **************************/
400
401             if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
402             {
403
404             /* REACTION-FIELD ELECTROSTATICS */
405             velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
406             felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
407
408             cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
409
410             /* Update potential sum for this i atom from the interaction with this j atom. */
411             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
412             velecsum         = _fjsp_add_v2r8(velecsum,velec);
413
414             fscal            = felec;
415
416             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
417
418             /* Update vectorial force */
419             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
420             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
421             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
422             
423             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
424             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
425             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
426
427             }
428
429             /**************************
430              * CALCULATE INTERACTIONS *
431              **************************/
432
433             if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
434             {
435
436             /* REACTION-FIELD ELECTROSTATICS */
437             velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
438             felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
439
440             cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
441
442             /* Update potential sum for this i atom from the interaction with this j atom. */
443             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
444             velecsum         = _fjsp_add_v2r8(velecsum,velec);
445
446             fscal            = felec;
447
448             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
449
450             /* Update vectorial force */
451             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
452             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
453             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
454             
455             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
456             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
457             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
458
459             }
460
461             /**************************
462              * CALCULATE INTERACTIONS *
463              **************************/
464
465             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
466             {
467
468             /* REACTION-FIELD ELECTROSTATICS */
469             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
470             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
471
472             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
473
474             /* Update potential sum for this i atom from the interaction with this j atom. */
475             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
476             velecsum         = _fjsp_add_v2r8(velecsum,velec);
477
478             fscal            = felec;
479
480             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
481
482             /* Update vectorial force */
483             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
484             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
485             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
486             
487             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
488             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
489             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
490
491             }
492
493             /**************************
494              * CALCULATE INTERACTIONS *
495              **************************/
496
497             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
498             {
499
500             /* REACTION-FIELD ELECTROSTATICS */
501             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
502             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
503
504             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
505
506             /* Update potential sum for this i atom from the interaction with this j atom. */
507             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
508             velecsum         = _fjsp_add_v2r8(velecsum,velec);
509
510             fscal            = felec;
511
512             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
513
514             /* Update vectorial force */
515             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
516             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
517             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
518             
519             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
520             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
521             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
522
523             }
524
525             /**************************
526              * CALCULATE INTERACTIONS *
527              **************************/
528
529             if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
530             {
531
532             /* REACTION-FIELD ELECTROSTATICS */
533             velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
534             felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
535
536             cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
537
538             /* Update potential sum for this i atom from the interaction with this j atom. */
539             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
540             velecsum         = _fjsp_add_v2r8(velecsum,velec);
541
542             fscal            = felec;
543
544             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
545
546             /* Update vectorial force */
547             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
548             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
549             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
550             
551             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
552             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
553             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
554
555             }
556
557             /**************************
558              * CALCULATE INTERACTIONS *
559              **************************/
560
561             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
562             {
563
564             /* REACTION-FIELD ELECTROSTATICS */
565             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
566             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
567
568             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
569
570             /* Update potential sum for this i atom from the interaction with this j atom. */
571             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
572             velecsum         = _fjsp_add_v2r8(velecsum,velec);
573
574             fscal            = felec;
575
576             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
577
578             /* Update vectorial force */
579             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
580             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
581             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
582             
583             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
584             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
585             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
586
587             }
588
589             /**************************
590              * CALCULATE INTERACTIONS *
591              **************************/
592
593             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
594             {
595
596             /* REACTION-FIELD ELECTROSTATICS */
597             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
598             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
599
600             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
601
602             /* Update potential sum for this i atom from the interaction with this j atom. */
603             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
604             velecsum         = _fjsp_add_v2r8(velecsum,velec);
605
606             fscal            = felec;
607
608             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
609
610             /* Update vectorial force */
611             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
612             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
613             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
614             
615             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
616             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
617             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
618
619             }
620
621             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
622
623             /* Inner loop uses 387 flops */
624         }
625
626         if(jidx<j_index_end)
627         {
628
629             jnrA             = jjnr[jidx];
630             j_coord_offsetA  = DIM*jnrA;
631
632             /* load j atom coordinates */
633             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
634                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
635
636             /* Calculate displacement vector */
637             dx00             = _fjsp_sub_v2r8(ix0,jx0);
638             dy00             = _fjsp_sub_v2r8(iy0,jy0);
639             dz00             = _fjsp_sub_v2r8(iz0,jz0);
640             dx01             = _fjsp_sub_v2r8(ix0,jx1);
641             dy01             = _fjsp_sub_v2r8(iy0,jy1);
642             dz01             = _fjsp_sub_v2r8(iz0,jz1);
643             dx02             = _fjsp_sub_v2r8(ix0,jx2);
644             dy02             = _fjsp_sub_v2r8(iy0,jy2);
645             dz02             = _fjsp_sub_v2r8(iz0,jz2);
646             dx10             = _fjsp_sub_v2r8(ix1,jx0);
647             dy10             = _fjsp_sub_v2r8(iy1,jy0);
648             dz10             = _fjsp_sub_v2r8(iz1,jz0);
649             dx11             = _fjsp_sub_v2r8(ix1,jx1);
650             dy11             = _fjsp_sub_v2r8(iy1,jy1);
651             dz11             = _fjsp_sub_v2r8(iz1,jz1);
652             dx12             = _fjsp_sub_v2r8(ix1,jx2);
653             dy12             = _fjsp_sub_v2r8(iy1,jy2);
654             dz12             = _fjsp_sub_v2r8(iz1,jz2);
655             dx20             = _fjsp_sub_v2r8(ix2,jx0);
656             dy20             = _fjsp_sub_v2r8(iy2,jy0);
657             dz20             = _fjsp_sub_v2r8(iz2,jz0);
658             dx21             = _fjsp_sub_v2r8(ix2,jx1);
659             dy21             = _fjsp_sub_v2r8(iy2,jy1);
660             dz21             = _fjsp_sub_v2r8(iz2,jz1);
661             dx22             = _fjsp_sub_v2r8(ix2,jx2);
662             dy22             = _fjsp_sub_v2r8(iy2,jy2);
663             dz22             = _fjsp_sub_v2r8(iz2,jz2);
664
665             /* Calculate squared distance and things based on it */
666             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
667             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
668             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
669             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
670             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
671             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
672             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
673             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
674             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
675
676             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
677             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
678             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
679             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
680             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
681             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
682             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
683             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
684             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
685
686             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
687             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
688             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
689             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
690             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
691             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
692             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
693             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
694             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
695
696             fjx0             = _fjsp_setzero_v2r8();
697             fjy0             = _fjsp_setzero_v2r8();
698             fjz0             = _fjsp_setzero_v2r8();
699             fjx1             = _fjsp_setzero_v2r8();
700             fjy1             = _fjsp_setzero_v2r8();
701             fjz1             = _fjsp_setzero_v2r8();
702             fjx2             = _fjsp_setzero_v2r8();
703             fjy2             = _fjsp_setzero_v2r8();
704             fjz2             = _fjsp_setzero_v2r8();
705
706             /**************************
707              * CALCULATE INTERACTIONS *
708              **************************/
709
710             if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
711             {
712
713             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
714
715             /* Calculate table index by multiplying r with table scale and truncate to integer */
716             rt               = _fjsp_mul_v2r8(r00,vftabscale);
717             itab_tmp         = _fjsp_dtox_v2r8(rt);
718             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
719             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
720             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
721
722             vfconv.i[0]     *= 8;
723             vfconv.i[1]     *= 8;
724
725             /* REACTION-FIELD ELECTROSTATICS */
726             velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
727             felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
728
729             /* CUBIC SPLINE TABLE DISPERSION */
730             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
731             F                = _fjsp_setzero_v2r8();
732             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
733             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
734             H                = _fjsp_setzero_v2r8();
735             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
736             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
737             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
738             vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
739             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
740             fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
741
742             /* CUBIC SPLINE TABLE REPULSION */
743             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
744             F                = _fjsp_setzero_v2r8();
745             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
746             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
747             H                = _fjsp_setzero_v2r8();
748             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
749             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
750             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
751             vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
752             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
753             fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
754             vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
755             fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
756
757             cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
758
759             /* Update potential sum for this i atom from the interaction with this j atom. */
760             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
761             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
762             velecsum         = _fjsp_add_v2r8(velecsum,velec);
763             vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
764             vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
765             vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
766
767             fscal            = _fjsp_add_v2r8(felec,fvdw);
768
769             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
770
771             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
772
773             /* Update vectorial force */
774             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
775             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
776             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
777             
778             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
779             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
780             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
781
782             }
783
784             /**************************
785              * CALCULATE INTERACTIONS *
786              **************************/
787
788             if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
789             {
790
791             /* REACTION-FIELD ELECTROSTATICS */
792             velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
793             felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
794
795             cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
796
797             /* Update potential sum for this i atom from the interaction with this j atom. */
798             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
799             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
800             velecsum         = _fjsp_add_v2r8(velecsum,velec);
801
802             fscal            = felec;
803
804             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
805
806             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
807
808             /* Update vectorial force */
809             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
810             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
811             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
812             
813             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
814             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
815             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
816
817             }
818
819             /**************************
820              * CALCULATE INTERACTIONS *
821              **************************/
822
823             if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
824             {
825
826             /* REACTION-FIELD ELECTROSTATICS */
827             velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
828             felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
829
830             cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
831
832             /* Update potential sum for this i atom from the interaction with this j atom. */
833             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
834             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
835             velecsum         = _fjsp_add_v2r8(velecsum,velec);
836
837             fscal            = felec;
838
839             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
840
841             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
842
843             /* Update vectorial force */
844             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
845             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
846             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
847             
848             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
849             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
850             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
851
852             }
853
854             /**************************
855              * CALCULATE INTERACTIONS *
856              **************************/
857
858             if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
859             {
860
861             /* REACTION-FIELD ELECTROSTATICS */
862             velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
863             felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
864
865             cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
866
867             /* Update potential sum for this i atom from the interaction with this j atom. */
868             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
869             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
870             velecsum         = _fjsp_add_v2r8(velecsum,velec);
871
872             fscal            = felec;
873
874             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
875
876             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
877
878             /* Update vectorial force */
879             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
880             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
881             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
882             
883             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
884             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
885             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
886
887             }
888
889             /**************************
890              * CALCULATE INTERACTIONS *
891              **************************/
892
893             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
894             {
895
896             /* REACTION-FIELD ELECTROSTATICS */
897             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
898             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
899
900             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
901
902             /* Update potential sum for this i atom from the interaction with this j atom. */
903             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
904             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
905             velecsum         = _fjsp_add_v2r8(velecsum,velec);
906
907             fscal            = felec;
908
909             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
910
911             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
912
913             /* Update vectorial force */
914             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
915             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
916             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
917             
918             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
919             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
920             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
921
922             }
923
924             /**************************
925              * CALCULATE INTERACTIONS *
926              **************************/
927
928             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
929             {
930
931             /* REACTION-FIELD ELECTROSTATICS */
932             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
933             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
934
935             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
936
937             /* Update potential sum for this i atom from the interaction with this j atom. */
938             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
939             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
940             velecsum         = _fjsp_add_v2r8(velecsum,velec);
941
942             fscal            = felec;
943
944             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
945
946             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
947
948             /* Update vectorial force */
949             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
950             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
951             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
952             
953             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
954             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
955             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
956
957             }
958
959             /**************************
960              * CALCULATE INTERACTIONS *
961              **************************/
962
963             if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
964             {
965
966             /* REACTION-FIELD ELECTROSTATICS */
967             velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
968             felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
969
970             cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
971
972             /* Update potential sum for this i atom from the interaction with this j atom. */
973             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
974             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
975             velecsum         = _fjsp_add_v2r8(velecsum,velec);
976
977             fscal            = felec;
978
979             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
980
981             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
982
983             /* Update vectorial force */
984             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
985             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
986             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
987             
988             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
989             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
990             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
991
992             }
993
994             /**************************
995              * CALCULATE INTERACTIONS *
996              **************************/
997
998             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
999             {
1000
1001             /* REACTION-FIELD ELECTROSTATICS */
1002             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
1003             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1004
1005             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1006
1007             /* Update potential sum for this i atom from the interaction with this j atom. */
1008             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
1009             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1010             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1011
1012             fscal            = felec;
1013
1014             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1015
1016             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1017
1018             /* Update vectorial force */
1019             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1020             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1021             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1022             
1023             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1024             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1025             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1026
1027             }
1028
1029             /**************************
1030              * CALCULATE INTERACTIONS *
1031              **************************/
1032
1033             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1034             {
1035
1036             /* REACTION-FIELD ELECTROSTATICS */
1037             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
1038             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1039
1040             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1041
1042             /* Update potential sum for this i atom from the interaction with this j atom. */
1043             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
1044             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1045             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1046
1047             fscal            = felec;
1048
1049             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1050
1051             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1052
1053             /* Update vectorial force */
1054             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1055             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1056             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1057             
1058             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1059             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1060             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1061
1062             }
1063
1064             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1065
1066             /* Inner loop uses 387 flops */
1067         }
1068
1069         /* End of innermost loop */
1070
1071         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1072                                               f+i_coord_offset,fshift+i_shift_offset);
1073
1074         ggid                        = gid[iidx];
1075         /* Update potential energies */
1076         gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
1077         gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
1078
1079         /* Increment number of inner iterations */
1080         inneriter                  += j_index_end - j_index_start;
1081
1082         /* Outer loop uses 20 flops */
1083     }
1084
1085     /* Increment number of outer iterations */
1086     outeriter        += nri;
1087
1088     /* Update outer/inner flops */
1089
1090     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*387);
1091 }
1092 /*
1093  * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
1094  * Electrostatics interaction: ReactionField
1095  * VdW interaction:            CubicSplineTable
1096  * Geometry:                   Water3-Water3
1097  * Calculate force/pot:        Force
1098  */
1099 void
1100 nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
1101                     (t_nblist                    * gmx_restrict       nlist,
1102                      rvec                        * gmx_restrict          xx,
1103                      rvec                        * gmx_restrict          ff,
1104                      t_forcerec                  * gmx_restrict          fr,
1105                      t_mdatoms                   * gmx_restrict     mdatoms,
1106                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
1107                      t_nrnb                      * gmx_restrict        nrnb)
1108 {
1109     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1110      * just 0 for non-waters.
1111      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
1112      * jnr indices corresponding to data put in the four positions in the SIMD register.
1113      */
1114     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
1115     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1116     int              jnrA,jnrB;
1117     int              j_coord_offsetA,j_coord_offsetB;
1118     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
1119     real             rcutoff_scalar;
1120     real             *shiftvec,*fshift,*x,*f;
1121     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1122     int              vdwioffset0;
1123     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1124     int              vdwioffset1;
1125     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1126     int              vdwioffset2;
1127     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1128     int              vdwjidx0A,vdwjidx0B;
1129     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1130     int              vdwjidx1A,vdwjidx1B;
1131     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1132     int              vdwjidx2A,vdwjidx2B;
1133     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1134     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1135     _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
1136     _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
1137     _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
1138     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1139     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1140     _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
1141     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1142     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1143     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
1144     real             *charge;
1145     int              nvdwtype;
1146     _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
1147     int              *vdwtype;
1148     real             *vdwparam;
1149     _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
1150     _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
1151     _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
1152     real             *vftab;
1153     _fjsp_v2r8       itab_tmp;
1154     _fjsp_v2r8       dummy_mask,cutoff_mask;
1155     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
1156     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
1157     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1158
1159     x                = xx[0];
1160     f                = ff[0];
1161
1162     nri              = nlist->nri;
1163     iinr             = nlist->iinr;
1164     jindex           = nlist->jindex;
1165     jjnr             = nlist->jjnr;
1166     shiftidx         = nlist->shift;
1167     gid              = nlist->gid;
1168     shiftvec         = fr->shift_vec[0];
1169     fshift           = fr->fshift[0];
1170     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
1171     charge           = mdatoms->chargeA;
1172     krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
1173     krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
1174     crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
1175     nvdwtype         = fr->ntype;
1176     vdwparam         = fr->nbfp;
1177     vdwtype          = mdatoms->typeA;
1178
1179     vftab            = kernel_data->table_vdw->data;
1180     vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
1181
1182     /* Setup water-specific parameters */
1183     inr              = nlist->iinr[0];
1184     iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
1185     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1186     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1187     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
1188
1189     jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
1190     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
1191     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
1192     vdwjidx0A        = 2*vdwtype[inr+0];
1193     qq00             = _fjsp_mul_v2r8(iq0,jq0);
1194     c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
1195     c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
1196     qq01             = _fjsp_mul_v2r8(iq0,jq1);
1197     qq02             = _fjsp_mul_v2r8(iq0,jq2);
1198     qq10             = _fjsp_mul_v2r8(iq1,jq0);
1199     qq11             = _fjsp_mul_v2r8(iq1,jq1);
1200     qq12             = _fjsp_mul_v2r8(iq1,jq2);
1201     qq20             = _fjsp_mul_v2r8(iq2,jq0);
1202     qq21             = _fjsp_mul_v2r8(iq2,jq1);
1203     qq22             = _fjsp_mul_v2r8(iq2,jq2);
1204
1205     /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
1206     rcutoff_scalar   = fr->rcoulomb;
1207     rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
1208     rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
1209
1210     /* Avoid stupid compiler warnings */
1211     jnrA = jnrB = 0;
1212     j_coord_offsetA = 0;
1213     j_coord_offsetB = 0;
1214
1215     outeriter        = 0;
1216     inneriter        = 0;
1217
1218     /* Start outer loop over neighborlists */
1219     for(iidx=0; iidx<nri; iidx++)
1220     {
1221         /* Load shift vector for this list */
1222         i_shift_offset   = DIM*shiftidx[iidx];
1223
1224         /* Load limits for loop over neighbors */
1225         j_index_start    = jindex[iidx];
1226         j_index_end      = jindex[iidx+1];
1227
1228         /* Get outer coordinate index */
1229         inr              = iinr[iidx];
1230         i_coord_offset   = DIM*inr;
1231
1232         /* Load i particle coords and add shift vector */
1233         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
1234                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
1235
1236         fix0             = _fjsp_setzero_v2r8();
1237         fiy0             = _fjsp_setzero_v2r8();
1238         fiz0             = _fjsp_setzero_v2r8();
1239         fix1             = _fjsp_setzero_v2r8();
1240         fiy1             = _fjsp_setzero_v2r8();
1241         fiz1             = _fjsp_setzero_v2r8();
1242         fix2             = _fjsp_setzero_v2r8();
1243         fiy2             = _fjsp_setzero_v2r8();
1244         fiz2             = _fjsp_setzero_v2r8();
1245
1246         /* Start inner kernel loop */
1247         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1248         {
1249
1250             /* Get j neighbor index, and coordinate index */
1251             jnrA             = jjnr[jidx];
1252             jnrB             = jjnr[jidx+1];
1253             j_coord_offsetA  = DIM*jnrA;
1254             j_coord_offsetB  = DIM*jnrB;
1255
1256             /* load j atom coordinates */
1257             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
1258                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1259
1260             /* Calculate displacement vector */
1261             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1262             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1263             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1264             dx01             = _fjsp_sub_v2r8(ix0,jx1);
1265             dy01             = _fjsp_sub_v2r8(iy0,jy1);
1266             dz01             = _fjsp_sub_v2r8(iz0,jz1);
1267             dx02             = _fjsp_sub_v2r8(ix0,jx2);
1268             dy02             = _fjsp_sub_v2r8(iy0,jy2);
1269             dz02             = _fjsp_sub_v2r8(iz0,jz2);
1270             dx10             = _fjsp_sub_v2r8(ix1,jx0);
1271             dy10             = _fjsp_sub_v2r8(iy1,jy0);
1272             dz10             = _fjsp_sub_v2r8(iz1,jz0);
1273             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1274             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1275             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1276             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1277             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1278             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1279             dx20             = _fjsp_sub_v2r8(ix2,jx0);
1280             dy20             = _fjsp_sub_v2r8(iy2,jy0);
1281             dz20             = _fjsp_sub_v2r8(iz2,jz0);
1282             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1283             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1284             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1285             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1286             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1287             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1288
1289             /* Calculate squared distance and things based on it */
1290             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1291             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1292             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1293             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1294             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1295             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1296             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1297             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1298             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1299
1300             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
1301             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
1302             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
1303             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
1304             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1305             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1306             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
1307             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1308             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1309
1310             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
1311             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
1312             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
1313             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
1314             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1315             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1316             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
1317             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1318             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1319
1320             fjx0             = _fjsp_setzero_v2r8();
1321             fjy0             = _fjsp_setzero_v2r8();
1322             fjz0             = _fjsp_setzero_v2r8();
1323             fjx1             = _fjsp_setzero_v2r8();
1324             fjy1             = _fjsp_setzero_v2r8();
1325             fjz1             = _fjsp_setzero_v2r8();
1326             fjx2             = _fjsp_setzero_v2r8();
1327             fjy2             = _fjsp_setzero_v2r8();
1328             fjz2             = _fjsp_setzero_v2r8();
1329
1330             /**************************
1331              * CALCULATE INTERACTIONS *
1332              **************************/
1333
1334             if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
1335             {
1336
1337             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
1338
1339             /* Calculate table index by multiplying r with table scale and truncate to integer */
1340             rt               = _fjsp_mul_v2r8(r00,vftabscale);
1341             itab_tmp         = _fjsp_dtox_v2r8(rt);
1342             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1343             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1344             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1345
1346             vfconv.i[0]     *= 8;
1347             vfconv.i[1]     *= 8;
1348
1349             /* REACTION-FIELD ELECTROSTATICS */
1350             felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
1351
1352             /* CUBIC SPLINE TABLE DISPERSION */
1353             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1354             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1355             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1356             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
1357             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
1358             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1359             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1360             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1361             fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
1362
1363             /* CUBIC SPLINE TABLE REPULSION */
1364             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
1365             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
1366             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1367             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
1368             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
1369             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1370             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1371             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1372             fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
1373             fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
1374
1375             cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
1376
1377             fscal            = _fjsp_add_v2r8(felec,fvdw);
1378
1379             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1380
1381             /* Update vectorial force */
1382             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1383             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1384             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1385             
1386             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1387             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1388             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1389
1390             }
1391
1392             /**************************
1393              * CALCULATE INTERACTIONS *
1394              **************************/
1395
1396             if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
1397             {
1398
1399             /* REACTION-FIELD ELECTROSTATICS */
1400             felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
1401
1402             cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
1403
1404             fscal            = felec;
1405
1406             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1407
1408             /* Update vectorial force */
1409             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
1410             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1411             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1412             
1413             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1414             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1415             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1416
1417             }
1418
1419             /**************************
1420              * CALCULATE INTERACTIONS *
1421              **************************/
1422
1423             if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
1424             {
1425
1426             /* REACTION-FIELD ELECTROSTATICS */
1427             felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
1428
1429             cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
1430
1431             fscal            = felec;
1432
1433             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1434
1435             /* Update vectorial force */
1436             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
1437             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1438             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1439             
1440             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1441             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1442             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1443
1444             }
1445
1446             /**************************
1447              * CALCULATE INTERACTIONS *
1448              **************************/
1449
1450             if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
1451             {
1452
1453             /* REACTION-FIELD ELECTROSTATICS */
1454             felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
1455
1456             cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
1457
1458             fscal            = felec;
1459
1460             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1461
1462             /* Update vectorial force */
1463             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
1464             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1465             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1466             
1467             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1468             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1469             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1470
1471             }
1472
1473             /**************************
1474              * CALCULATE INTERACTIONS *
1475              **************************/
1476
1477             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
1478             {
1479
1480             /* REACTION-FIELD ELECTROSTATICS */
1481             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
1482
1483             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
1484
1485             fscal            = felec;
1486
1487             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1488
1489             /* Update vectorial force */
1490             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1491             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1492             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1493             
1494             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1495             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1496             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1497
1498             }
1499
1500             /**************************
1501              * CALCULATE INTERACTIONS *
1502              **************************/
1503
1504             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
1505             {
1506
1507             /* REACTION-FIELD ELECTROSTATICS */
1508             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
1509
1510             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
1511
1512             fscal            = felec;
1513
1514             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1515
1516             /* Update vectorial force */
1517             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1518             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1519             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1520             
1521             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1522             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1523             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1524
1525             }
1526
1527             /**************************
1528              * CALCULATE INTERACTIONS *
1529              **************************/
1530
1531             if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
1532             {
1533
1534             /* REACTION-FIELD ELECTROSTATICS */
1535             felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
1536
1537             cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
1538
1539             fscal            = felec;
1540
1541             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1542
1543             /* Update vectorial force */
1544             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
1545             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1546             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1547             
1548             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1549             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1550             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1551
1552             }
1553
1554             /**************************
1555              * CALCULATE INTERACTIONS *
1556              **************************/
1557
1558             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1559             {
1560
1561             /* REACTION-FIELD ELECTROSTATICS */
1562             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1563
1564             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1565
1566             fscal            = felec;
1567
1568             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1569
1570             /* Update vectorial force */
1571             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1572             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1573             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1574             
1575             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1576             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1577             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1578
1579             }
1580
1581             /**************************
1582              * CALCULATE INTERACTIONS *
1583              **************************/
1584
1585             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1586             {
1587
1588             /* REACTION-FIELD ELECTROSTATICS */
1589             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1590
1591             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1592
1593             fscal            = felec;
1594
1595             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1596
1597             /* Update vectorial force */
1598             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1599             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1600             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1601             
1602             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1603             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1604             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1605
1606             }
1607
1608             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1609
1610             /* Inner loop uses 324 flops */
1611         }
1612
1613         if(jidx<j_index_end)
1614         {
1615
1616             jnrA             = jjnr[jidx];
1617             j_coord_offsetA  = DIM*jnrA;
1618
1619             /* load j atom coordinates */
1620             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1621                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1622
1623             /* Calculate displacement vector */
1624             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1625             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1626             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1627             dx01             = _fjsp_sub_v2r8(ix0,jx1);
1628             dy01             = _fjsp_sub_v2r8(iy0,jy1);
1629             dz01             = _fjsp_sub_v2r8(iz0,jz1);
1630             dx02             = _fjsp_sub_v2r8(ix0,jx2);
1631             dy02             = _fjsp_sub_v2r8(iy0,jy2);
1632             dz02             = _fjsp_sub_v2r8(iz0,jz2);
1633             dx10             = _fjsp_sub_v2r8(ix1,jx0);
1634             dy10             = _fjsp_sub_v2r8(iy1,jy0);
1635             dz10             = _fjsp_sub_v2r8(iz1,jz0);
1636             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1637             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1638             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1639             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1640             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1641             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1642             dx20             = _fjsp_sub_v2r8(ix2,jx0);
1643             dy20             = _fjsp_sub_v2r8(iy2,jy0);
1644             dz20             = _fjsp_sub_v2r8(iz2,jz0);
1645             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1646             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1647             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1648             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1649             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1650             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1651
1652             /* Calculate squared distance and things based on it */
1653             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1654             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1655             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1656             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1657             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1658             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1659             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1660             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1661             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1662
1663             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
1664             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
1665             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
1666             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
1667             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1668             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1669             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
1670             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1671             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1672
1673             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
1674             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
1675             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
1676             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
1677             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1678             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1679             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
1680             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1681             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1682
1683             fjx0             = _fjsp_setzero_v2r8();
1684             fjy0             = _fjsp_setzero_v2r8();
1685             fjz0             = _fjsp_setzero_v2r8();
1686             fjx1             = _fjsp_setzero_v2r8();
1687             fjy1             = _fjsp_setzero_v2r8();
1688             fjz1             = _fjsp_setzero_v2r8();
1689             fjx2             = _fjsp_setzero_v2r8();
1690             fjy2             = _fjsp_setzero_v2r8();
1691             fjz2             = _fjsp_setzero_v2r8();
1692
1693             /**************************
1694              * CALCULATE INTERACTIONS *
1695              **************************/
1696
1697             if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
1698             {
1699
1700             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
1701
1702             /* Calculate table index by multiplying r with table scale and truncate to integer */
1703             rt               = _fjsp_mul_v2r8(r00,vftabscale);
1704             itab_tmp         = _fjsp_dtox_v2r8(rt);
1705             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1706             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1707             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1708
1709             vfconv.i[0]     *= 8;
1710             vfconv.i[1]     *= 8;
1711
1712             /* REACTION-FIELD ELECTROSTATICS */
1713             felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
1714
1715             /* CUBIC SPLINE TABLE DISPERSION */
1716             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1717             F                = _fjsp_setzero_v2r8();
1718             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1719             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
1720             H                = _fjsp_setzero_v2r8();
1721             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1722             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1723             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1724             fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
1725
1726             /* CUBIC SPLINE TABLE REPULSION */
1727             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
1728             F                = _fjsp_setzero_v2r8();
1729             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1730             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
1731             H                = _fjsp_setzero_v2r8();
1732             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1733             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1734             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1735             fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
1736             fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
1737
1738             cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
1739
1740             fscal            = _fjsp_add_v2r8(felec,fvdw);
1741
1742             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1743
1744             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1745
1746             /* Update vectorial force */
1747             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1748             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1749             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1750             
1751             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1752             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1753             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1754
1755             }
1756
1757             /**************************
1758              * CALCULATE INTERACTIONS *
1759              **************************/
1760
1761             if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
1762             {
1763
1764             /* REACTION-FIELD ELECTROSTATICS */
1765             felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
1766
1767             cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
1768
1769             fscal            = felec;
1770
1771             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1772
1773             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1774
1775             /* Update vectorial force */
1776             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
1777             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1778             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1779             
1780             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1781             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1782             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1783
1784             }
1785
1786             /**************************
1787              * CALCULATE INTERACTIONS *
1788              **************************/
1789
1790             if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
1791             {
1792
1793             /* REACTION-FIELD ELECTROSTATICS */
1794             felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
1795
1796             cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
1797
1798             fscal            = felec;
1799
1800             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1801
1802             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1803
1804             /* Update vectorial force */
1805             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
1806             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1807             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1808             
1809             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1810             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1811             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1812
1813             }
1814
1815             /**************************
1816              * CALCULATE INTERACTIONS *
1817              **************************/
1818
1819             if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
1820             {
1821
1822             /* REACTION-FIELD ELECTROSTATICS */
1823             felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
1824
1825             cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
1826
1827             fscal            = felec;
1828
1829             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1830
1831             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1832
1833             /* Update vectorial force */
1834             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
1835             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1836             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1837             
1838             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1839             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1840             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1841
1842             }
1843
1844             /**************************
1845              * CALCULATE INTERACTIONS *
1846              **************************/
1847
1848             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
1849             {
1850
1851             /* REACTION-FIELD ELECTROSTATICS */
1852             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
1853
1854             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
1855
1856             fscal            = felec;
1857
1858             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1859
1860             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1861
1862             /* Update vectorial force */
1863             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1864             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1865             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1866             
1867             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1868             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1869             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1870
1871             }
1872
1873             /**************************
1874              * CALCULATE INTERACTIONS *
1875              **************************/
1876
1877             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
1878             {
1879
1880             /* REACTION-FIELD ELECTROSTATICS */
1881             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
1882
1883             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
1884
1885             fscal            = felec;
1886
1887             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1888
1889             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1890
1891             /* Update vectorial force */
1892             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1893             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1894             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1895             
1896             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1897             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1898             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1899
1900             }
1901
1902             /**************************
1903              * CALCULATE INTERACTIONS *
1904              **************************/
1905
1906             if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
1907             {
1908
1909             /* REACTION-FIELD ELECTROSTATICS */
1910             felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
1911
1912             cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
1913
1914             fscal            = felec;
1915
1916             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1917
1918             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1919
1920             /* Update vectorial force */
1921             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
1922             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1923             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1924             
1925             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1926             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1927             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1928
1929             }
1930
1931             /**************************
1932              * CALCULATE INTERACTIONS *
1933              **************************/
1934
1935             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1936             {
1937
1938             /* REACTION-FIELD ELECTROSTATICS */
1939             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1940
1941             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1942
1943             fscal            = felec;
1944
1945             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1946
1947             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1948
1949             /* Update vectorial force */
1950             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1951             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1952             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1953             
1954             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1955             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1956             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1957
1958             }
1959
1960             /**************************
1961              * CALCULATE INTERACTIONS *
1962              **************************/
1963
1964             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1965             {
1966
1967             /* REACTION-FIELD ELECTROSTATICS */
1968             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1969
1970             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1971
1972             fscal            = felec;
1973
1974             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1975
1976             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1977
1978             /* Update vectorial force */
1979             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1980             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1981             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1982             
1983             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1984             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1985             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1986
1987             }
1988
1989             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1990
1991             /* Inner loop uses 324 flops */
1992         }
1993
1994         /* End of innermost loop */
1995
1996         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1997                                               f+i_coord_offset,fshift+i_shift_offset);
1998
1999         /* Increment number of inner iterations */
2000         inneriter                  += j_index_end - j_index_start;
2001
2002         /* Outer loop uses 18 flops */
2003     }
2004
2005     /* Increment number of outer iterations */
2006     outeriter        += nri;
2007
2008     /* Update outer/inner flops */
2009
2010     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*324);
2011 }