5e5b2405a9922c2700ee4d7bb3ed39f1ba86902a
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_sparc64_hpc_ace_double / nb_kernel_ElecRF_VdwNone_GeomW4W4_sparc64_hpc_ace_double.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
37  */
38 #include "config.h"
39
40 #include <math.h>
41
42 #include "../nb_kernel.h"
43 #include "types/simple.h"
44 #include "gromacs/math/vec.h"
45 #include "nrnb.h"
46
47 #include "kernelutil_sparc64_hpc_ace_double.h"
48
49 /*
50  * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
51  * Electrostatics interaction: ReactionField
52  * VdW interaction:            None
53  * Geometry:                   Water4-Water4
54  * Calculate force/pot:        PotentialAndForce
55  */
56 void
57 nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
58                     (t_nblist                    * gmx_restrict       nlist,
59                      rvec                        * gmx_restrict          xx,
60                      rvec                        * gmx_restrict          ff,
61                      t_forcerec                  * gmx_restrict          fr,
62                      t_mdatoms                   * gmx_restrict     mdatoms,
63                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
64                      t_nrnb                      * gmx_restrict        nrnb)
65 {
66     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
67      * just 0 for non-waters.
68      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
69      * jnr indices corresponding to data put in the four positions in the SIMD register.
70      */
71     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
72     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
73     int              jnrA,jnrB;
74     int              j_coord_offsetA,j_coord_offsetB;
75     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
76     real             rcutoff_scalar;
77     real             *shiftvec,*fshift,*x,*f;
78     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
79     int              vdwioffset1;
80     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
81     int              vdwioffset2;
82     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
83     int              vdwioffset3;
84     _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
85     int              vdwjidx1A,vdwjidx1B;
86     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
87     int              vdwjidx2A,vdwjidx2B;
88     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
89     int              vdwjidx3A,vdwjidx3B;
90     _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
91     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
92     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
93     _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
94     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
95     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
96     _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
97     _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
98     _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
99     _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
100     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
101     real             *charge;
102     _fjsp_v2r8       itab_tmp;
103     _fjsp_v2r8       dummy_mask,cutoff_mask;
104     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
105     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
106     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
107
108     x                = xx[0];
109     f                = ff[0];
110
111     nri              = nlist->nri;
112     iinr             = nlist->iinr;
113     jindex           = nlist->jindex;
114     jjnr             = nlist->jjnr;
115     shiftidx         = nlist->shift;
116     gid              = nlist->gid;
117     shiftvec         = fr->shift_vec[0];
118     fshift           = fr->fshift[0];
119     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
120     charge           = mdatoms->chargeA;
121     krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
122     krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
123     crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
124
125     /* Setup water-specific parameters */
126     inr              = nlist->iinr[0];
127     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
128     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
129     iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
130
131     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
132     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
133     jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
134     qq11             = _fjsp_mul_v2r8(iq1,jq1);
135     qq12             = _fjsp_mul_v2r8(iq1,jq2);
136     qq13             = _fjsp_mul_v2r8(iq1,jq3);
137     qq21             = _fjsp_mul_v2r8(iq2,jq1);
138     qq22             = _fjsp_mul_v2r8(iq2,jq2);
139     qq23             = _fjsp_mul_v2r8(iq2,jq3);
140     qq31             = _fjsp_mul_v2r8(iq3,jq1);
141     qq32             = _fjsp_mul_v2r8(iq3,jq2);
142     qq33             = _fjsp_mul_v2r8(iq3,jq3);
143
144     /* Avoid stupid compiler warnings */
145     jnrA = jnrB = 0;
146     j_coord_offsetA = 0;
147     j_coord_offsetB = 0;
148
149     outeriter        = 0;
150     inneriter        = 0;
151
152     /* Start outer loop over neighborlists */
153     for(iidx=0; iidx<nri; iidx++)
154     {
155         /* Load shift vector for this list */
156         i_shift_offset   = DIM*shiftidx[iidx];
157
158         /* Load limits for loop over neighbors */
159         j_index_start    = jindex[iidx];
160         j_index_end      = jindex[iidx+1];
161
162         /* Get outer coordinate index */
163         inr              = iinr[iidx];
164         i_coord_offset   = DIM*inr;
165
166         /* Load i particle coords and add shift vector */
167         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
168                                                  &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
169
170         fix1             = _fjsp_setzero_v2r8();
171         fiy1             = _fjsp_setzero_v2r8();
172         fiz1             = _fjsp_setzero_v2r8();
173         fix2             = _fjsp_setzero_v2r8();
174         fiy2             = _fjsp_setzero_v2r8();
175         fiz2             = _fjsp_setzero_v2r8();
176         fix3             = _fjsp_setzero_v2r8();
177         fiy3             = _fjsp_setzero_v2r8();
178         fiz3             = _fjsp_setzero_v2r8();
179
180         /* Reset potential sums */
181         velecsum         = _fjsp_setzero_v2r8();
182
183         /* Start inner kernel loop */
184         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
185         {
186
187             /* Get j neighbor index, and coordinate index */
188             jnrA             = jjnr[jidx];
189             jnrB             = jjnr[jidx+1];
190             j_coord_offsetA  = DIM*jnrA;
191             j_coord_offsetB  = DIM*jnrB;
192
193             /* load j atom coordinates */
194             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
195                                               &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
196
197             /* Calculate displacement vector */
198             dx11             = _fjsp_sub_v2r8(ix1,jx1);
199             dy11             = _fjsp_sub_v2r8(iy1,jy1);
200             dz11             = _fjsp_sub_v2r8(iz1,jz1);
201             dx12             = _fjsp_sub_v2r8(ix1,jx2);
202             dy12             = _fjsp_sub_v2r8(iy1,jy2);
203             dz12             = _fjsp_sub_v2r8(iz1,jz2);
204             dx13             = _fjsp_sub_v2r8(ix1,jx3);
205             dy13             = _fjsp_sub_v2r8(iy1,jy3);
206             dz13             = _fjsp_sub_v2r8(iz1,jz3);
207             dx21             = _fjsp_sub_v2r8(ix2,jx1);
208             dy21             = _fjsp_sub_v2r8(iy2,jy1);
209             dz21             = _fjsp_sub_v2r8(iz2,jz1);
210             dx22             = _fjsp_sub_v2r8(ix2,jx2);
211             dy22             = _fjsp_sub_v2r8(iy2,jy2);
212             dz22             = _fjsp_sub_v2r8(iz2,jz2);
213             dx23             = _fjsp_sub_v2r8(ix2,jx3);
214             dy23             = _fjsp_sub_v2r8(iy2,jy3);
215             dz23             = _fjsp_sub_v2r8(iz2,jz3);
216             dx31             = _fjsp_sub_v2r8(ix3,jx1);
217             dy31             = _fjsp_sub_v2r8(iy3,jy1);
218             dz31             = _fjsp_sub_v2r8(iz3,jz1);
219             dx32             = _fjsp_sub_v2r8(ix3,jx2);
220             dy32             = _fjsp_sub_v2r8(iy3,jy2);
221             dz32             = _fjsp_sub_v2r8(iz3,jz2);
222             dx33             = _fjsp_sub_v2r8(ix3,jx3);
223             dy33             = _fjsp_sub_v2r8(iy3,jy3);
224             dz33             = _fjsp_sub_v2r8(iz3,jz3);
225
226             /* Calculate squared distance and things based on it */
227             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
228             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
229             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
230             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
231             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
232             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
233             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
234             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
235             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
236
237             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
238             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
239             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
240             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
241             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
242             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
243             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
244             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
245             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
246
247             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
248             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
249             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
250             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
251             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
252             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
253             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
254             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
255             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
256
257             fjx1             = _fjsp_setzero_v2r8();
258             fjy1             = _fjsp_setzero_v2r8();
259             fjz1             = _fjsp_setzero_v2r8();
260             fjx2             = _fjsp_setzero_v2r8();
261             fjy2             = _fjsp_setzero_v2r8();
262             fjz2             = _fjsp_setzero_v2r8();
263             fjx3             = _fjsp_setzero_v2r8();
264             fjy3             = _fjsp_setzero_v2r8();
265             fjz3             = _fjsp_setzero_v2r8();
266
267             /**************************
268              * CALCULATE INTERACTIONS *
269              **************************/
270
271             /* REACTION-FIELD ELECTROSTATICS */
272             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
273             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
274
275             /* Update potential sum for this i atom from the interaction with this j atom. */
276             velecsum         = _fjsp_add_v2r8(velecsum,velec);
277
278             fscal            = felec;
279
280             /* Update vectorial force */
281             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
282             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
283             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
284             
285             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
286             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
287             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
288
289             /**************************
290              * CALCULATE INTERACTIONS *
291              **************************/
292
293             /* REACTION-FIELD ELECTROSTATICS */
294             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
295             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
296
297             /* Update potential sum for this i atom from the interaction with this j atom. */
298             velecsum         = _fjsp_add_v2r8(velecsum,velec);
299
300             fscal            = felec;
301
302             /* Update vectorial force */
303             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
304             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
305             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
306             
307             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
308             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
309             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
310
311             /**************************
312              * CALCULATE INTERACTIONS *
313              **************************/
314
315             /* REACTION-FIELD ELECTROSTATICS */
316             velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
317             felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
318
319             /* Update potential sum for this i atom from the interaction with this j atom. */
320             velecsum         = _fjsp_add_v2r8(velecsum,velec);
321
322             fscal            = felec;
323
324             /* Update vectorial force */
325             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
326             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
327             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
328             
329             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
330             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
331             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
332
333             /**************************
334              * CALCULATE INTERACTIONS *
335              **************************/
336
337             /* REACTION-FIELD ELECTROSTATICS */
338             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
339             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
340
341             /* Update potential sum for this i atom from the interaction with this j atom. */
342             velecsum         = _fjsp_add_v2r8(velecsum,velec);
343
344             fscal            = felec;
345
346             /* Update vectorial force */
347             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
348             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
349             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
350             
351             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
352             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
353             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
354
355             /**************************
356              * CALCULATE INTERACTIONS *
357              **************************/
358
359             /* REACTION-FIELD ELECTROSTATICS */
360             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
361             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
362
363             /* Update potential sum for this i atom from the interaction with this j atom. */
364             velecsum         = _fjsp_add_v2r8(velecsum,velec);
365
366             fscal            = felec;
367
368             /* Update vectorial force */
369             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
370             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
371             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
372             
373             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
374             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
375             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
376
377             /**************************
378              * CALCULATE INTERACTIONS *
379              **************************/
380
381             /* REACTION-FIELD ELECTROSTATICS */
382             velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
383             felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
384
385             /* Update potential sum for this i atom from the interaction with this j atom. */
386             velecsum         = _fjsp_add_v2r8(velecsum,velec);
387
388             fscal            = felec;
389
390             /* Update vectorial force */
391             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
392             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
393             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
394             
395             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
396             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
397             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
398
399             /**************************
400              * CALCULATE INTERACTIONS *
401              **************************/
402
403             /* REACTION-FIELD ELECTROSTATICS */
404             velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
405             felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
406
407             /* Update potential sum for this i atom from the interaction with this j atom. */
408             velecsum         = _fjsp_add_v2r8(velecsum,velec);
409
410             fscal            = felec;
411
412             /* Update vectorial force */
413             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
414             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
415             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
416             
417             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
418             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
419             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
420
421             /**************************
422              * CALCULATE INTERACTIONS *
423              **************************/
424
425             /* REACTION-FIELD ELECTROSTATICS */
426             velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
427             felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
428
429             /* Update potential sum for this i atom from the interaction with this j atom. */
430             velecsum         = _fjsp_add_v2r8(velecsum,velec);
431
432             fscal            = felec;
433
434             /* Update vectorial force */
435             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
436             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
437             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
438             
439             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
440             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
441             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
442
443             /**************************
444              * CALCULATE INTERACTIONS *
445              **************************/
446
447             /* REACTION-FIELD ELECTROSTATICS */
448             velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
449             felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
450
451             /* Update potential sum for this i atom from the interaction with this j atom. */
452             velecsum         = _fjsp_add_v2r8(velecsum,velec);
453
454             fscal            = felec;
455
456             /* Update vectorial force */
457             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
458             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
459             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
460             
461             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
462             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
463             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
464
465             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
466
467             /* Inner loop uses 315 flops */
468         }
469
470         if(jidx<j_index_end)
471         {
472
473             jnrA             = jjnr[jidx];
474             j_coord_offsetA  = DIM*jnrA;
475
476             /* load j atom coordinates */
477             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
478                                               &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
479
480             /* Calculate displacement vector */
481             dx11             = _fjsp_sub_v2r8(ix1,jx1);
482             dy11             = _fjsp_sub_v2r8(iy1,jy1);
483             dz11             = _fjsp_sub_v2r8(iz1,jz1);
484             dx12             = _fjsp_sub_v2r8(ix1,jx2);
485             dy12             = _fjsp_sub_v2r8(iy1,jy2);
486             dz12             = _fjsp_sub_v2r8(iz1,jz2);
487             dx13             = _fjsp_sub_v2r8(ix1,jx3);
488             dy13             = _fjsp_sub_v2r8(iy1,jy3);
489             dz13             = _fjsp_sub_v2r8(iz1,jz3);
490             dx21             = _fjsp_sub_v2r8(ix2,jx1);
491             dy21             = _fjsp_sub_v2r8(iy2,jy1);
492             dz21             = _fjsp_sub_v2r8(iz2,jz1);
493             dx22             = _fjsp_sub_v2r8(ix2,jx2);
494             dy22             = _fjsp_sub_v2r8(iy2,jy2);
495             dz22             = _fjsp_sub_v2r8(iz2,jz2);
496             dx23             = _fjsp_sub_v2r8(ix2,jx3);
497             dy23             = _fjsp_sub_v2r8(iy2,jy3);
498             dz23             = _fjsp_sub_v2r8(iz2,jz3);
499             dx31             = _fjsp_sub_v2r8(ix3,jx1);
500             dy31             = _fjsp_sub_v2r8(iy3,jy1);
501             dz31             = _fjsp_sub_v2r8(iz3,jz1);
502             dx32             = _fjsp_sub_v2r8(ix3,jx2);
503             dy32             = _fjsp_sub_v2r8(iy3,jy2);
504             dz32             = _fjsp_sub_v2r8(iz3,jz2);
505             dx33             = _fjsp_sub_v2r8(ix3,jx3);
506             dy33             = _fjsp_sub_v2r8(iy3,jy3);
507             dz33             = _fjsp_sub_v2r8(iz3,jz3);
508
509             /* Calculate squared distance and things based on it */
510             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
511             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
512             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
513             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
514             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
515             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
516             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
517             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
518             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
519
520             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
521             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
522             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
523             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
524             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
525             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
526             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
527             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
528             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
529
530             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
531             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
532             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
533             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
534             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
535             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
536             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
537             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
538             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
539
540             fjx1             = _fjsp_setzero_v2r8();
541             fjy1             = _fjsp_setzero_v2r8();
542             fjz1             = _fjsp_setzero_v2r8();
543             fjx2             = _fjsp_setzero_v2r8();
544             fjy2             = _fjsp_setzero_v2r8();
545             fjz2             = _fjsp_setzero_v2r8();
546             fjx3             = _fjsp_setzero_v2r8();
547             fjy3             = _fjsp_setzero_v2r8();
548             fjz3             = _fjsp_setzero_v2r8();
549
550             /**************************
551              * CALCULATE INTERACTIONS *
552              **************************/
553
554             /* REACTION-FIELD ELECTROSTATICS */
555             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
556             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
557
558             /* Update potential sum for this i atom from the interaction with this j atom. */
559             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
560             velecsum         = _fjsp_add_v2r8(velecsum,velec);
561
562             fscal            = felec;
563
564             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
565
566             /* Update vectorial force */
567             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
568             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
569             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
570             
571             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
572             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
573             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
574
575             /**************************
576              * CALCULATE INTERACTIONS *
577              **************************/
578
579             /* REACTION-FIELD ELECTROSTATICS */
580             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
581             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
582
583             /* Update potential sum for this i atom from the interaction with this j atom. */
584             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
585             velecsum         = _fjsp_add_v2r8(velecsum,velec);
586
587             fscal            = felec;
588
589             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
590
591             /* Update vectorial force */
592             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
593             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
594             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
595             
596             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
597             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
598             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
599
600             /**************************
601              * CALCULATE INTERACTIONS *
602              **************************/
603
604             /* REACTION-FIELD ELECTROSTATICS */
605             velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
606             felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
607
608             /* Update potential sum for this i atom from the interaction with this j atom. */
609             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
610             velecsum         = _fjsp_add_v2r8(velecsum,velec);
611
612             fscal            = felec;
613
614             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
615
616             /* Update vectorial force */
617             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
618             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
619             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
620             
621             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
622             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
623             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
624
625             /**************************
626              * CALCULATE INTERACTIONS *
627              **************************/
628
629             /* REACTION-FIELD ELECTROSTATICS */
630             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
631             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
632
633             /* Update potential sum for this i atom from the interaction with this j atom. */
634             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
635             velecsum         = _fjsp_add_v2r8(velecsum,velec);
636
637             fscal            = felec;
638
639             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
640
641             /* Update vectorial force */
642             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
643             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
644             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
645             
646             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
647             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
648             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
649
650             /**************************
651              * CALCULATE INTERACTIONS *
652              **************************/
653
654             /* REACTION-FIELD ELECTROSTATICS */
655             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
656             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
657
658             /* Update potential sum for this i atom from the interaction with this j atom. */
659             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
660             velecsum         = _fjsp_add_v2r8(velecsum,velec);
661
662             fscal            = felec;
663
664             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
665
666             /* Update vectorial force */
667             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
668             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
669             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
670             
671             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
672             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
673             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
674
675             /**************************
676              * CALCULATE INTERACTIONS *
677              **************************/
678
679             /* REACTION-FIELD ELECTROSTATICS */
680             velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
681             felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
682
683             /* Update potential sum for this i atom from the interaction with this j atom. */
684             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
685             velecsum         = _fjsp_add_v2r8(velecsum,velec);
686
687             fscal            = felec;
688
689             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
690
691             /* Update vectorial force */
692             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
693             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
694             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
695             
696             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
697             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
698             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
699
700             /**************************
701              * CALCULATE INTERACTIONS *
702              **************************/
703
704             /* REACTION-FIELD ELECTROSTATICS */
705             velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
706             felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
707
708             /* Update potential sum for this i atom from the interaction with this j atom. */
709             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
710             velecsum         = _fjsp_add_v2r8(velecsum,velec);
711
712             fscal            = felec;
713
714             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
715
716             /* Update vectorial force */
717             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
718             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
719             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
720             
721             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
722             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
723             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
724
725             /**************************
726              * CALCULATE INTERACTIONS *
727              **************************/
728
729             /* REACTION-FIELD ELECTROSTATICS */
730             velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
731             felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
732
733             /* Update potential sum for this i atom from the interaction with this j atom. */
734             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
735             velecsum         = _fjsp_add_v2r8(velecsum,velec);
736
737             fscal            = felec;
738
739             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
740
741             /* Update vectorial force */
742             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
743             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
744             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
745             
746             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
747             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
748             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
749
750             /**************************
751              * CALCULATE INTERACTIONS *
752              **************************/
753
754             /* REACTION-FIELD ELECTROSTATICS */
755             velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
756             felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
757
758             /* Update potential sum for this i atom from the interaction with this j atom. */
759             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
760             velecsum         = _fjsp_add_v2r8(velecsum,velec);
761
762             fscal            = felec;
763
764             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
765
766             /* Update vectorial force */
767             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
768             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
769             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
770             
771             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
772             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
773             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
774
775             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
776
777             /* Inner loop uses 315 flops */
778         }
779
780         /* End of innermost loop */
781
782         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
783                                               f+i_coord_offset+DIM,fshift+i_shift_offset);
784
785         ggid                        = gid[iidx];
786         /* Update potential energies */
787         gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
788
789         /* Increment number of inner iterations */
790         inneriter                  += j_index_end - j_index_start;
791
792         /* Outer loop uses 19 flops */
793     }
794
795     /* Increment number of outer iterations */
796     outeriter        += nri;
797
798     /* Update outer/inner flops */
799
800     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*19 + inneriter*315);
801 }
802 /*
803  * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
804  * Electrostatics interaction: ReactionField
805  * VdW interaction:            None
806  * Geometry:                   Water4-Water4
807  * Calculate force/pot:        Force
808  */
809 void
810 nb_kernel_ElecRF_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
811                     (t_nblist                    * gmx_restrict       nlist,
812                      rvec                        * gmx_restrict          xx,
813                      rvec                        * gmx_restrict          ff,
814                      t_forcerec                  * gmx_restrict          fr,
815                      t_mdatoms                   * gmx_restrict     mdatoms,
816                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
817                      t_nrnb                      * gmx_restrict        nrnb)
818 {
819     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
820      * just 0 for non-waters.
821      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
822      * jnr indices corresponding to data put in the four positions in the SIMD register.
823      */
824     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
825     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
826     int              jnrA,jnrB;
827     int              j_coord_offsetA,j_coord_offsetB;
828     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
829     real             rcutoff_scalar;
830     real             *shiftvec,*fshift,*x,*f;
831     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
832     int              vdwioffset1;
833     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
834     int              vdwioffset2;
835     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
836     int              vdwioffset3;
837     _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
838     int              vdwjidx1A,vdwjidx1B;
839     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
840     int              vdwjidx2A,vdwjidx2B;
841     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
842     int              vdwjidx3A,vdwjidx3B;
843     _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
844     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
845     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
846     _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
847     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
848     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
849     _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
850     _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
851     _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
852     _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
853     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
854     real             *charge;
855     _fjsp_v2r8       itab_tmp;
856     _fjsp_v2r8       dummy_mask,cutoff_mask;
857     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
858     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
859     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
860
861     x                = xx[0];
862     f                = ff[0];
863
864     nri              = nlist->nri;
865     iinr             = nlist->iinr;
866     jindex           = nlist->jindex;
867     jjnr             = nlist->jjnr;
868     shiftidx         = nlist->shift;
869     gid              = nlist->gid;
870     shiftvec         = fr->shift_vec[0];
871     fshift           = fr->fshift[0];
872     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
873     charge           = mdatoms->chargeA;
874     krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
875     krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
876     crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
877
878     /* Setup water-specific parameters */
879     inr              = nlist->iinr[0];
880     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
881     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
882     iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
883
884     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
885     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
886     jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
887     qq11             = _fjsp_mul_v2r8(iq1,jq1);
888     qq12             = _fjsp_mul_v2r8(iq1,jq2);
889     qq13             = _fjsp_mul_v2r8(iq1,jq3);
890     qq21             = _fjsp_mul_v2r8(iq2,jq1);
891     qq22             = _fjsp_mul_v2r8(iq2,jq2);
892     qq23             = _fjsp_mul_v2r8(iq2,jq3);
893     qq31             = _fjsp_mul_v2r8(iq3,jq1);
894     qq32             = _fjsp_mul_v2r8(iq3,jq2);
895     qq33             = _fjsp_mul_v2r8(iq3,jq3);
896
897     /* Avoid stupid compiler warnings */
898     jnrA = jnrB = 0;
899     j_coord_offsetA = 0;
900     j_coord_offsetB = 0;
901
902     outeriter        = 0;
903     inneriter        = 0;
904
905     /* Start outer loop over neighborlists */
906     for(iidx=0; iidx<nri; iidx++)
907     {
908         /* Load shift vector for this list */
909         i_shift_offset   = DIM*shiftidx[iidx];
910
911         /* Load limits for loop over neighbors */
912         j_index_start    = jindex[iidx];
913         j_index_end      = jindex[iidx+1];
914
915         /* Get outer coordinate index */
916         inr              = iinr[iidx];
917         i_coord_offset   = DIM*inr;
918
919         /* Load i particle coords and add shift vector */
920         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
921                                                  &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
922
923         fix1             = _fjsp_setzero_v2r8();
924         fiy1             = _fjsp_setzero_v2r8();
925         fiz1             = _fjsp_setzero_v2r8();
926         fix2             = _fjsp_setzero_v2r8();
927         fiy2             = _fjsp_setzero_v2r8();
928         fiz2             = _fjsp_setzero_v2r8();
929         fix3             = _fjsp_setzero_v2r8();
930         fiy3             = _fjsp_setzero_v2r8();
931         fiz3             = _fjsp_setzero_v2r8();
932
933         /* Start inner kernel loop */
934         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
935         {
936
937             /* Get j neighbor index, and coordinate index */
938             jnrA             = jjnr[jidx];
939             jnrB             = jjnr[jidx+1];
940             j_coord_offsetA  = DIM*jnrA;
941             j_coord_offsetB  = DIM*jnrB;
942
943             /* load j atom coordinates */
944             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
945                                               &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
946
947             /* Calculate displacement vector */
948             dx11             = _fjsp_sub_v2r8(ix1,jx1);
949             dy11             = _fjsp_sub_v2r8(iy1,jy1);
950             dz11             = _fjsp_sub_v2r8(iz1,jz1);
951             dx12             = _fjsp_sub_v2r8(ix1,jx2);
952             dy12             = _fjsp_sub_v2r8(iy1,jy2);
953             dz12             = _fjsp_sub_v2r8(iz1,jz2);
954             dx13             = _fjsp_sub_v2r8(ix1,jx3);
955             dy13             = _fjsp_sub_v2r8(iy1,jy3);
956             dz13             = _fjsp_sub_v2r8(iz1,jz3);
957             dx21             = _fjsp_sub_v2r8(ix2,jx1);
958             dy21             = _fjsp_sub_v2r8(iy2,jy1);
959             dz21             = _fjsp_sub_v2r8(iz2,jz1);
960             dx22             = _fjsp_sub_v2r8(ix2,jx2);
961             dy22             = _fjsp_sub_v2r8(iy2,jy2);
962             dz22             = _fjsp_sub_v2r8(iz2,jz2);
963             dx23             = _fjsp_sub_v2r8(ix2,jx3);
964             dy23             = _fjsp_sub_v2r8(iy2,jy3);
965             dz23             = _fjsp_sub_v2r8(iz2,jz3);
966             dx31             = _fjsp_sub_v2r8(ix3,jx1);
967             dy31             = _fjsp_sub_v2r8(iy3,jy1);
968             dz31             = _fjsp_sub_v2r8(iz3,jz1);
969             dx32             = _fjsp_sub_v2r8(ix3,jx2);
970             dy32             = _fjsp_sub_v2r8(iy3,jy2);
971             dz32             = _fjsp_sub_v2r8(iz3,jz2);
972             dx33             = _fjsp_sub_v2r8(ix3,jx3);
973             dy33             = _fjsp_sub_v2r8(iy3,jy3);
974             dz33             = _fjsp_sub_v2r8(iz3,jz3);
975
976             /* Calculate squared distance and things based on it */
977             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
978             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
979             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
980             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
981             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
982             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
983             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
984             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
985             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
986
987             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
988             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
989             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
990             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
991             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
992             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
993             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
994             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
995             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
996
997             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
998             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
999             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
1000             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1001             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1002             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
1003             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
1004             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
1005             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
1006
1007             fjx1             = _fjsp_setzero_v2r8();
1008             fjy1             = _fjsp_setzero_v2r8();
1009             fjz1             = _fjsp_setzero_v2r8();
1010             fjx2             = _fjsp_setzero_v2r8();
1011             fjy2             = _fjsp_setzero_v2r8();
1012             fjz2             = _fjsp_setzero_v2r8();
1013             fjx3             = _fjsp_setzero_v2r8();
1014             fjy3             = _fjsp_setzero_v2r8();
1015             fjz3             = _fjsp_setzero_v2r8();
1016
1017             /**************************
1018              * CALCULATE INTERACTIONS *
1019              **************************/
1020
1021             /* REACTION-FIELD ELECTROSTATICS */
1022             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
1023
1024             fscal            = felec;
1025
1026             /* Update vectorial force */
1027             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1028             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1029             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1030             
1031             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1032             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1033             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1034
1035             /**************************
1036              * CALCULATE INTERACTIONS *
1037              **************************/
1038
1039             /* REACTION-FIELD ELECTROSTATICS */
1040             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
1041
1042             fscal            = felec;
1043
1044             /* Update vectorial force */
1045             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1046             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1047             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1048             
1049             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1050             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1051             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1052
1053             /**************************
1054              * CALCULATE INTERACTIONS *
1055              **************************/
1056
1057             /* REACTION-FIELD ELECTROSTATICS */
1058             felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
1059
1060             fscal            = felec;
1061
1062             /* Update vectorial force */
1063             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
1064             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
1065             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
1066             
1067             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
1068             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
1069             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
1070
1071             /**************************
1072              * CALCULATE INTERACTIONS *
1073              **************************/
1074
1075             /* REACTION-FIELD ELECTROSTATICS */
1076             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1077
1078             fscal            = felec;
1079
1080             /* Update vectorial force */
1081             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1082             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1083             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1084             
1085             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1086             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1087             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1088
1089             /**************************
1090              * CALCULATE INTERACTIONS *
1091              **************************/
1092
1093             /* REACTION-FIELD ELECTROSTATICS */
1094             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1095
1096             fscal            = felec;
1097
1098             /* Update vectorial force */
1099             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1100             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1101             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1102             
1103             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1104             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1105             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1106
1107             /**************************
1108              * CALCULATE INTERACTIONS *
1109              **************************/
1110
1111             /* REACTION-FIELD ELECTROSTATICS */
1112             felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
1113
1114             fscal            = felec;
1115
1116             /* Update vectorial force */
1117             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
1118             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
1119             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
1120             
1121             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
1122             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
1123             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
1124
1125             /**************************
1126              * CALCULATE INTERACTIONS *
1127              **************************/
1128
1129             /* REACTION-FIELD ELECTROSTATICS */
1130             felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
1131
1132             fscal            = felec;
1133
1134             /* Update vectorial force */
1135             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
1136             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
1137             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
1138             
1139             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
1140             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
1141             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
1142
1143             /**************************
1144              * CALCULATE INTERACTIONS *
1145              **************************/
1146
1147             /* REACTION-FIELD ELECTROSTATICS */
1148             felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
1149
1150             fscal            = felec;
1151
1152             /* Update vectorial force */
1153             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
1154             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
1155             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
1156             
1157             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
1158             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
1159             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
1160
1161             /**************************
1162              * CALCULATE INTERACTIONS *
1163              **************************/
1164
1165             /* REACTION-FIELD ELECTROSTATICS */
1166             felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
1167
1168             fscal            = felec;
1169
1170             /* Update vectorial force */
1171             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
1172             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
1173             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
1174             
1175             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
1176             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
1177             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
1178
1179             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1180
1181             /* Inner loop uses 270 flops */
1182         }
1183
1184         if(jidx<j_index_end)
1185         {
1186
1187             jnrA             = jjnr[jidx];
1188             j_coord_offsetA  = DIM*jnrA;
1189
1190             /* load j atom coordinates */
1191             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
1192                                               &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
1193
1194             /* Calculate displacement vector */
1195             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1196             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1197             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1198             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1199             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1200             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1201             dx13             = _fjsp_sub_v2r8(ix1,jx3);
1202             dy13             = _fjsp_sub_v2r8(iy1,jy3);
1203             dz13             = _fjsp_sub_v2r8(iz1,jz3);
1204             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1205             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1206             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1207             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1208             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1209             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1210             dx23             = _fjsp_sub_v2r8(ix2,jx3);
1211             dy23             = _fjsp_sub_v2r8(iy2,jy3);
1212             dz23             = _fjsp_sub_v2r8(iz2,jz3);
1213             dx31             = _fjsp_sub_v2r8(ix3,jx1);
1214             dy31             = _fjsp_sub_v2r8(iy3,jy1);
1215             dz31             = _fjsp_sub_v2r8(iz3,jz1);
1216             dx32             = _fjsp_sub_v2r8(ix3,jx2);
1217             dy32             = _fjsp_sub_v2r8(iy3,jy2);
1218             dz32             = _fjsp_sub_v2r8(iz3,jz2);
1219             dx33             = _fjsp_sub_v2r8(ix3,jx3);
1220             dy33             = _fjsp_sub_v2r8(iy3,jy3);
1221             dz33             = _fjsp_sub_v2r8(iz3,jz3);
1222
1223             /* Calculate squared distance and things based on it */
1224             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1225             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1226             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
1227             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1228             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1229             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
1230             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
1231             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
1232             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
1233
1234             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1235             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1236             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
1237             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1238             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1239             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
1240             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
1241             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
1242             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
1243
1244             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1245             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1246             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
1247             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1248             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1249             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
1250             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
1251             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
1252             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
1253
1254             fjx1             = _fjsp_setzero_v2r8();
1255             fjy1             = _fjsp_setzero_v2r8();
1256             fjz1             = _fjsp_setzero_v2r8();
1257             fjx2             = _fjsp_setzero_v2r8();
1258             fjy2             = _fjsp_setzero_v2r8();
1259             fjz2             = _fjsp_setzero_v2r8();
1260             fjx3             = _fjsp_setzero_v2r8();
1261             fjy3             = _fjsp_setzero_v2r8();
1262             fjz3             = _fjsp_setzero_v2r8();
1263
1264             /**************************
1265              * CALCULATE INTERACTIONS *
1266              **************************/
1267
1268             /* REACTION-FIELD ELECTROSTATICS */
1269             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
1270
1271             fscal            = felec;
1272
1273             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1274
1275             /* Update vectorial force */
1276             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1277             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1278             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1279             
1280             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1281             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1282             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1283
1284             /**************************
1285              * CALCULATE INTERACTIONS *
1286              **************************/
1287
1288             /* REACTION-FIELD ELECTROSTATICS */
1289             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
1290
1291             fscal            = felec;
1292
1293             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1294
1295             /* Update vectorial force */
1296             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1297             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1298             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1299             
1300             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1301             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1302             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1303
1304             /**************************
1305              * CALCULATE INTERACTIONS *
1306              **************************/
1307
1308             /* REACTION-FIELD ELECTROSTATICS */
1309             felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
1310
1311             fscal            = felec;
1312
1313             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1314
1315             /* Update vectorial force */
1316             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
1317             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
1318             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
1319             
1320             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
1321             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
1322             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
1323
1324             /**************************
1325              * CALCULATE INTERACTIONS *
1326              **************************/
1327
1328             /* REACTION-FIELD ELECTROSTATICS */
1329             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1330
1331             fscal            = felec;
1332
1333             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1334
1335             /* Update vectorial force */
1336             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1337             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1338             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1339             
1340             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1341             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1342             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1343
1344             /**************************
1345              * CALCULATE INTERACTIONS *
1346              **************************/
1347
1348             /* REACTION-FIELD ELECTROSTATICS */
1349             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1350
1351             fscal            = felec;
1352
1353             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1354
1355             /* Update vectorial force */
1356             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1357             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1358             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1359             
1360             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1361             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1362             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1363
1364             /**************************
1365              * CALCULATE INTERACTIONS *
1366              **************************/
1367
1368             /* REACTION-FIELD ELECTROSTATICS */
1369             felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
1370
1371             fscal            = felec;
1372
1373             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1374
1375             /* Update vectorial force */
1376             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
1377             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
1378             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
1379             
1380             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
1381             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
1382             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
1383
1384             /**************************
1385              * CALCULATE INTERACTIONS *
1386              **************************/
1387
1388             /* REACTION-FIELD ELECTROSTATICS */
1389             felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
1390
1391             fscal            = felec;
1392
1393             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1394
1395             /* Update vectorial force */
1396             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
1397             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
1398             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
1399             
1400             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
1401             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
1402             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
1403
1404             /**************************
1405              * CALCULATE INTERACTIONS *
1406              **************************/
1407
1408             /* REACTION-FIELD ELECTROSTATICS */
1409             felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
1410
1411             fscal            = felec;
1412
1413             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1414
1415             /* Update vectorial force */
1416             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
1417             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
1418             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
1419             
1420             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
1421             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
1422             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
1423
1424             /**************************
1425              * CALCULATE INTERACTIONS *
1426              **************************/
1427
1428             /* REACTION-FIELD ELECTROSTATICS */
1429             felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
1430
1431             fscal            = felec;
1432
1433             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1434
1435             /* Update vectorial force */
1436             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
1437             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
1438             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
1439             
1440             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
1441             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
1442             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
1443
1444             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1445
1446             /* Inner loop uses 270 flops */
1447         }
1448
1449         /* End of innermost loop */
1450
1451         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
1452                                               f+i_coord_offset+DIM,fshift+i_shift_offset);
1453
1454         /* Increment number of inner iterations */
1455         inneriter                  += j_index_end - j_index_start;
1456
1457         /* Outer loop uses 18 flops */
1458     }
1459
1460     /* Increment number of outer iterations */
1461     outeriter        += nri;
1462
1463     /* Update outer/inner flops */
1464
1465     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_F,outeriter*18 + inneriter*270);
1466 }