Introduce gmxpre.h for truly global definitions
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_sparc64_hpc_ace_double / nb_kernel_ElecCoul_VdwNone_GeomW4W4_sparc64_hpc_ace_double.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
37  */
38 #include "gmxpre.h"
39
40 #include "config.h"
41
42 #include <math.h>
43
44 #include "../nb_kernel.h"
45 #include "gromacs/legacyheaders/types/simple.h"
46 #include "gromacs/math/vec.h"
47 #include "gromacs/legacyheaders/nrnb.h"
48
49 #include "kernelutil_sparc64_hpc_ace_double.h"
50
51 /*
52  * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
53  * Electrostatics interaction: Coulomb
54  * VdW interaction:            None
55  * Geometry:                   Water4-Water4
56  * Calculate force/pot:        PotentialAndForce
57  */
58 void
59 nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
60                     (t_nblist                    * gmx_restrict       nlist,
61                      rvec                        * gmx_restrict          xx,
62                      rvec                        * gmx_restrict          ff,
63                      t_forcerec                  * gmx_restrict          fr,
64                      t_mdatoms                   * gmx_restrict     mdatoms,
65                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
66                      t_nrnb                      * gmx_restrict        nrnb)
67 {
68     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
69      * just 0 for non-waters.
70      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
71      * jnr indices corresponding to data put in the four positions in the SIMD register.
72      */
73     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
74     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
75     int              jnrA,jnrB;
76     int              j_coord_offsetA,j_coord_offsetB;
77     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
78     real             rcutoff_scalar;
79     real             *shiftvec,*fshift,*x,*f;
80     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
81     int              vdwioffset1;
82     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
83     int              vdwioffset2;
84     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
85     int              vdwioffset3;
86     _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
87     int              vdwjidx1A,vdwjidx1B;
88     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
89     int              vdwjidx2A,vdwjidx2B;
90     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
91     int              vdwjidx3A,vdwjidx3B;
92     _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
93     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
94     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
95     _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
96     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
97     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
98     _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
99     _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
100     _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
101     _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
102     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
103     real             *charge;
104     _fjsp_v2r8       itab_tmp;
105     _fjsp_v2r8       dummy_mask,cutoff_mask;
106     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
107     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
108     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
109
110     x                = xx[0];
111     f                = ff[0];
112
113     nri              = nlist->nri;
114     iinr             = nlist->iinr;
115     jindex           = nlist->jindex;
116     jjnr             = nlist->jjnr;
117     shiftidx         = nlist->shift;
118     gid              = nlist->gid;
119     shiftvec         = fr->shift_vec[0];
120     fshift           = fr->fshift[0];
121     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
122     charge           = mdatoms->chargeA;
123
124     /* Setup water-specific parameters */
125     inr              = nlist->iinr[0];
126     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
127     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
128     iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
129
130     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
131     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
132     jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
133     qq11             = _fjsp_mul_v2r8(iq1,jq1);
134     qq12             = _fjsp_mul_v2r8(iq1,jq2);
135     qq13             = _fjsp_mul_v2r8(iq1,jq3);
136     qq21             = _fjsp_mul_v2r8(iq2,jq1);
137     qq22             = _fjsp_mul_v2r8(iq2,jq2);
138     qq23             = _fjsp_mul_v2r8(iq2,jq3);
139     qq31             = _fjsp_mul_v2r8(iq3,jq1);
140     qq32             = _fjsp_mul_v2r8(iq3,jq2);
141     qq33             = _fjsp_mul_v2r8(iq3,jq3);
142
143     /* Avoid stupid compiler warnings */
144     jnrA = jnrB = 0;
145     j_coord_offsetA = 0;
146     j_coord_offsetB = 0;
147
148     outeriter        = 0;
149     inneriter        = 0;
150
151     /* Start outer loop over neighborlists */
152     for(iidx=0; iidx<nri; iidx++)
153     {
154         /* Load shift vector for this list */
155         i_shift_offset   = DIM*shiftidx[iidx];
156
157         /* Load limits for loop over neighbors */
158         j_index_start    = jindex[iidx];
159         j_index_end      = jindex[iidx+1];
160
161         /* Get outer coordinate index */
162         inr              = iinr[iidx];
163         i_coord_offset   = DIM*inr;
164
165         /* Load i particle coords and add shift vector */
166         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
167                                                  &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
168
169         fix1             = _fjsp_setzero_v2r8();
170         fiy1             = _fjsp_setzero_v2r8();
171         fiz1             = _fjsp_setzero_v2r8();
172         fix2             = _fjsp_setzero_v2r8();
173         fiy2             = _fjsp_setzero_v2r8();
174         fiz2             = _fjsp_setzero_v2r8();
175         fix3             = _fjsp_setzero_v2r8();
176         fiy3             = _fjsp_setzero_v2r8();
177         fiz3             = _fjsp_setzero_v2r8();
178
179         /* Reset potential sums */
180         velecsum         = _fjsp_setzero_v2r8();
181
182         /* Start inner kernel loop */
183         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
184         {
185
186             /* Get j neighbor index, and coordinate index */
187             jnrA             = jjnr[jidx];
188             jnrB             = jjnr[jidx+1];
189             j_coord_offsetA  = DIM*jnrA;
190             j_coord_offsetB  = DIM*jnrB;
191
192             /* load j atom coordinates */
193             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
194                                               &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
195
196             /* Calculate displacement vector */
197             dx11             = _fjsp_sub_v2r8(ix1,jx1);
198             dy11             = _fjsp_sub_v2r8(iy1,jy1);
199             dz11             = _fjsp_sub_v2r8(iz1,jz1);
200             dx12             = _fjsp_sub_v2r8(ix1,jx2);
201             dy12             = _fjsp_sub_v2r8(iy1,jy2);
202             dz12             = _fjsp_sub_v2r8(iz1,jz2);
203             dx13             = _fjsp_sub_v2r8(ix1,jx3);
204             dy13             = _fjsp_sub_v2r8(iy1,jy3);
205             dz13             = _fjsp_sub_v2r8(iz1,jz3);
206             dx21             = _fjsp_sub_v2r8(ix2,jx1);
207             dy21             = _fjsp_sub_v2r8(iy2,jy1);
208             dz21             = _fjsp_sub_v2r8(iz2,jz1);
209             dx22             = _fjsp_sub_v2r8(ix2,jx2);
210             dy22             = _fjsp_sub_v2r8(iy2,jy2);
211             dz22             = _fjsp_sub_v2r8(iz2,jz2);
212             dx23             = _fjsp_sub_v2r8(ix2,jx3);
213             dy23             = _fjsp_sub_v2r8(iy2,jy3);
214             dz23             = _fjsp_sub_v2r8(iz2,jz3);
215             dx31             = _fjsp_sub_v2r8(ix3,jx1);
216             dy31             = _fjsp_sub_v2r8(iy3,jy1);
217             dz31             = _fjsp_sub_v2r8(iz3,jz1);
218             dx32             = _fjsp_sub_v2r8(ix3,jx2);
219             dy32             = _fjsp_sub_v2r8(iy3,jy2);
220             dz32             = _fjsp_sub_v2r8(iz3,jz2);
221             dx33             = _fjsp_sub_v2r8(ix3,jx3);
222             dy33             = _fjsp_sub_v2r8(iy3,jy3);
223             dz33             = _fjsp_sub_v2r8(iz3,jz3);
224
225             /* Calculate squared distance and things based on it */
226             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
227             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
228             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
229             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
230             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
231             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
232             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
233             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
234             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
235
236             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
237             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
238             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
239             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
240             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
241             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
242             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
243             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
244             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
245
246             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
247             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
248             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
249             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
250             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
251             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
252             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
253             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
254             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
255
256             fjx1             = _fjsp_setzero_v2r8();
257             fjy1             = _fjsp_setzero_v2r8();
258             fjz1             = _fjsp_setzero_v2r8();
259             fjx2             = _fjsp_setzero_v2r8();
260             fjy2             = _fjsp_setzero_v2r8();
261             fjz2             = _fjsp_setzero_v2r8();
262             fjx3             = _fjsp_setzero_v2r8();
263             fjy3             = _fjsp_setzero_v2r8();
264             fjz3             = _fjsp_setzero_v2r8();
265
266             /**************************
267              * CALCULATE INTERACTIONS *
268              **************************/
269
270             /* COULOMB ELECTROSTATICS */
271             velec            = _fjsp_mul_v2r8(qq11,rinv11);
272             felec            = _fjsp_mul_v2r8(velec,rinvsq11);
273
274             /* Update potential sum for this i atom from the interaction with this j atom. */
275             velecsum         = _fjsp_add_v2r8(velecsum,velec);
276
277             fscal            = felec;
278
279             /* Update vectorial force */
280             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
281             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
282             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
283             
284             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
285             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
286             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
287
288             /**************************
289              * CALCULATE INTERACTIONS *
290              **************************/
291
292             /* COULOMB ELECTROSTATICS */
293             velec            = _fjsp_mul_v2r8(qq12,rinv12);
294             felec            = _fjsp_mul_v2r8(velec,rinvsq12);
295
296             /* Update potential sum for this i atom from the interaction with this j atom. */
297             velecsum         = _fjsp_add_v2r8(velecsum,velec);
298
299             fscal            = felec;
300
301             /* Update vectorial force */
302             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
303             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
304             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
305             
306             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
307             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
308             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
309
310             /**************************
311              * CALCULATE INTERACTIONS *
312              **************************/
313
314             /* COULOMB ELECTROSTATICS */
315             velec            = _fjsp_mul_v2r8(qq13,rinv13);
316             felec            = _fjsp_mul_v2r8(velec,rinvsq13);
317
318             /* Update potential sum for this i atom from the interaction with this j atom. */
319             velecsum         = _fjsp_add_v2r8(velecsum,velec);
320
321             fscal            = felec;
322
323             /* Update vectorial force */
324             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
325             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
326             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
327             
328             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
329             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
330             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
331
332             /**************************
333              * CALCULATE INTERACTIONS *
334              **************************/
335
336             /* COULOMB ELECTROSTATICS */
337             velec            = _fjsp_mul_v2r8(qq21,rinv21);
338             felec            = _fjsp_mul_v2r8(velec,rinvsq21);
339
340             /* Update potential sum for this i atom from the interaction with this j atom. */
341             velecsum         = _fjsp_add_v2r8(velecsum,velec);
342
343             fscal            = felec;
344
345             /* Update vectorial force */
346             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
347             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
348             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
349             
350             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
351             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
352             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
353
354             /**************************
355              * CALCULATE INTERACTIONS *
356              **************************/
357
358             /* COULOMB ELECTROSTATICS */
359             velec            = _fjsp_mul_v2r8(qq22,rinv22);
360             felec            = _fjsp_mul_v2r8(velec,rinvsq22);
361
362             /* Update potential sum for this i atom from the interaction with this j atom. */
363             velecsum         = _fjsp_add_v2r8(velecsum,velec);
364
365             fscal            = felec;
366
367             /* Update vectorial force */
368             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
369             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
370             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
371             
372             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
373             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
374             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
375
376             /**************************
377              * CALCULATE INTERACTIONS *
378              **************************/
379
380             /* COULOMB ELECTROSTATICS */
381             velec            = _fjsp_mul_v2r8(qq23,rinv23);
382             felec            = _fjsp_mul_v2r8(velec,rinvsq23);
383
384             /* Update potential sum for this i atom from the interaction with this j atom. */
385             velecsum         = _fjsp_add_v2r8(velecsum,velec);
386
387             fscal            = felec;
388
389             /* Update vectorial force */
390             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
391             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
392             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
393             
394             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
395             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
396             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
397
398             /**************************
399              * CALCULATE INTERACTIONS *
400              **************************/
401
402             /* COULOMB ELECTROSTATICS */
403             velec            = _fjsp_mul_v2r8(qq31,rinv31);
404             felec            = _fjsp_mul_v2r8(velec,rinvsq31);
405
406             /* Update potential sum for this i atom from the interaction with this j atom. */
407             velecsum         = _fjsp_add_v2r8(velecsum,velec);
408
409             fscal            = felec;
410
411             /* Update vectorial force */
412             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
413             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
414             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
415             
416             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
417             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
418             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
419
420             /**************************
421              * CALCULATE INTERACTIONS *
422              **************************/
423
424             /* COULOMB ELECTROSTATICS */
425             velec            = _fjsp_mul_v2r8(qq32,rinv32);
426             felec            = _fjsp_mul_v2r8(velec,rinvsq32);
427
428             /* Update potential sum for this i atom from the interaction with this j atom. */
429             velecsum         = _fjsp_add_v2r8(velecsum,velec);
430
431             fscal            = felec;
432
433             /* Update vectorial force */
434             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
435             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
436             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
437             
438             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
439             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
440             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
441
442             /**************************
443              * CALCULATE INTERACTIONS *
444              **************************/
445
446             /* COULOMB ELECTROSTATICS */
447             velec            = _fjsp_mul_v2r8(qq33,rinv33);
448             felec            = _fjsp_mul_v2r8(velec,rinvsq33);
449
450             /* Update potential sum for this i atom from the interaction with this j atom. */
451             velecsum         = _fjsp_add_v2r8(velecsum,velec);
452
453             fscal            = felec;
454
455             /* Update vectorial force */
456             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
457             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
458             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
459             
460             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
461             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
462             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
463
464             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
465
466             /* Inner loop uses 279 flops */
467         }
468
469         if(jidx<j_index_end)
470         {
471
472             jnrA             = jjnr[jidx];
473             j_coord_offsetA  = DIM*jnrA;
474
475             /* load j atom coordinates */
476             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
477                                               &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
478
479             /* Calculate displacement vector */
480             dx11             = _fjsp_sub_v2r8(ix1,jx1);
481             dy11             = _fjsp_sub_v2r8(iy1,jy1);
482             dz11             = _fjsp_sub_v2r8(iz1,jz1);
483             dx12             = _fjsp_sub_v2r8(ix1,jx2);
484             dy12             = _fjsp_sub_v2r8(iy1,jy2);
485             dz12             = _fjsp_sub_v2r8(iz1,jz2);
486             dx13             = _fjsp_sub_v2r8(ix1,jx3);
487             dy13             = _fjsp_sub_v2r8(iy1,jy3);
488             dz13             = _fjsp_sub_v2r8(iz1,jz3);
489             dx21             = _fjsp_sub_v2r8(ix2,jx1);
490             dy21             = _fjsp_sub_v2r8(iy2,jy1);
491             dz21             = _fjsp_sub_v2r8(iz2,jz1);
492             dx22             = _fjsp_sub_v2r8(ix2,jx2);
493             dy22             = _fjsp_sub_v2r8(iy2,jy2);
494             dz22             = _fjsp_sub_v2r8(iz2,jz2);
495             dx23             = _fjsp_sub_v2r8(ix2,jx3);
496             dy23             = _fjsp_sub_v2r8(iy2,jy3);
497             dz23             = _fjsp_sub_v2r8(iz2,jz3);
498             dx31             = _fjsp_sub_v2r8(ix3,jx1);
499             dy31             = _fjsp_sub_v2r8(iy3,jy1);
500             dz31             = _fjsp_sub_v2r8(iz3,jz1);
501             dx32             = _fjsp_sub_v2r8(ix3,jx2);
502             dy32             = _fjsp_sub_v2r8(iy3,jy2);
503             dz32             = _fjsp_sub_v2r8(iz3,jz2);
504             dx33             = _fjsp_sub_v2r8(ix3,jx3);
505             dy33             = _fjsp_sub_v2r8(iy3,jy3);
506             dz33             = _fjsp_sub_v2r8(iz3,jz3);
507
508             /* Calculate squared distance and things based on it */
509             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
510             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
511             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
512             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
513             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
514             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
515             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
516             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
517             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
518
519             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
520             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
521             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
522             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
523             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
524             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
525             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
526             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
527             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
528
529             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
530             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
531             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
532             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
533             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
534             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
535             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
536             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
537             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
538
539             fjx1             = _fjsp_setzero_v2r8();
540             fjy1             = _fjsp_setzero_v2r8();
541             fjz1             = _fjsp_setzero_v2r8();
542             fjx2             = _fjsp_setzero_v2r8();
543             fjy2             = _fjsp_setzero_v2r8();
544             fjz2             = _fjsp_setzero_v2r8();
545             fjx3             = _fjsp_setzero_v2r8();
546             fjy3             = _fjsp_setzero_v2r8();
547             fjz3             = _fjsp_setzero_v2r8();
548
549             /**************************
550              * CALCULATE INTERACTIONS *
551              **************************/
552
553             /* COULOMB ELECTROSTATICS */
554             velec            = _fjsp_mul_v2r8(qq11,rinv11);
555             felec            = _fjsp_mul_v2r8(velec,rinvsq11);
556
557             /* Update potential sum for this i atom from the interaction with this j atom. */
558             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
559             velecsum         = _fjsp_add_v2r8(velecsum,velec);
560
561             fscal            = felec;
562
563             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
564
565             /* Update vectorial force */
566             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
567             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
568             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
569             
570             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
571             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
572             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
573
574             /**************************
575              * CALCULATE INTERACTIONS *
576              **************************/
577
578             /* COULOMB ELECTROSTATICS */
579             velec            = _fjsp_mul_v2r8(qq12,rinv12);
580             felec            = _fjsp_mul_v2r8(velec,rinvsq12);
581
582             /* Update potential sum for this i atom from the interaction with this j atom. */
583             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
584             velecsum         = _fjsp_add_v2r8(velecsum,velec);
585
586             fscal            = felec;
587
588             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
589
590             /* Update vectorial force */
591             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
592             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
593             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
594             
595             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
596             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
597             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
598
599             /**************************
600              * CALCULATE INTERACTIONS *
601              **************************/
602
603             /* COULOMB ELECTROSTATICS */
604             velec            = _fjsp_mul_v2r8(qq13,rinv13);
605             felec            = _fjsp_mul_v2r8(velec,rinvsq13);
606
607             /* Update potential sum for this i atom from the interaction with this j atom. */
608             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
609             velecsum         = _fjsp_add_v2r8(velecsum,velec);
610
611             fscal            = felec;
612
613             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
614
615             /* Update vectorial force */
616             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
617             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
618             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
619             
620             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
621             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
622             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
623
624             /**************************
625              * CALCULATE INTERACTIONS *
626              **************************/
627
628             /* COULOMB ELECTROSTATICS */
629             velec            = _fjsp_mul_v2r8(qq21,rinv21);
630             felec            = _fjsp_mul_v2r8(velec,rinvsq21);
631
632             /* Update potential sum for this i atom from the interaction with this j atom. */
633             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
634             velecsum         = _fjsp_add_v2r8(velecsum,velec);
635
636             fscal            = felec;
637
638             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
639
640             /* Update vectorial force */
641             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
642             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
643             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
644             
645             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
646             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
647             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
648
649             /**************************
650              * CALCULATE INTERACTIONS *
651              **************************/
652
653             /* COULOMB ELECTROSTATICS */
654             velec            = _fjsp_mul_v2r8(qq22,rinv22);
655             felec            = _fjsp_mul_v2r8(velec,rinvsq22);
656
657             /* Update potential sum for this i atom from the interaction with this j atom. */
658             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
659             velecsum         = _fjsp_add_v2r8(velecsum,velec);
660
661             fscal            = felec;
662
663             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
664
665             /* Update vectorial force */
666             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
667             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
668             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
669             
670             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
671             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
672             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
673
674             /**************************
675              * CALCULATE INTERACTIONS *
676              **************************/
677
678             /* COULOMB ELECTROSTATICS */
679             velec            = _fjsp_mul_v2r8(qq23,rinv23);
680             felec            = _fjsp_mul_v2r8(velec,rinvsq23);
681
682             /* Update potential sum for this i atom from the interaction with this j atom. */
683             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
684             velecsum         = _fjsp_add_v2r8(velecsum,velec);
685
686             fscal            = felec;
687
688             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
689
690             /* Update vectorial force */
691             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
692             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
693             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
694             
695             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
696             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
697             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
698
699             /**************************
700              * CALCULATE INTERACTIONS *
701              **************************/
702
703             /* COULOMB ELECTROSTATICS */
704             velec            = _fjsp_mul_v2r8(qq31,rinv31);
705             felec            = _fjsp_mul_v2r8(velec,rinvsq31);
706
707             /* Update potential sum for this i atom from the interaction with this j atom. */
708             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
709             velecsum         = _fjsp_add_v2r8(velecsum,velec);
710
711             fscal            = felec;
712
713             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
714
715             /* Update vectorial force */
716             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
717             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
718             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
719             
720             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
721             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
722             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
723
724             /**************************
725              * CALCULATE INTERACTIONS *
726              **************************/
727
728             /* COULOMB ELECTROSTATICS */
729             velec            = _fjsp_mul_v2r8(qq32,rinv32);
730             felec            = _fjsp_mul_v2r8(velec,rinvsq32);
731
732             /* Update potential sum for this i atom from the interaction with this j atom. */
733             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
734             velecsum         = _fjsp_add_v2r8(velecsum,velec);
735
736             fscal            = felec;
737
738             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
739
740             /* Update vectorial force */
741             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
742             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
743             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
744             
745             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
746             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
747             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
748
749             /**************************
750              * CALCULATE INTERACTIONS *
751              **************************/
752
753             /* COULOMB ELECTROSTATICS */
754             velec            = _fjsp_mul_v2r8(qq33,rinv33);
755             felec            = _fjsp_mul_v2r8(velec,rinvsq33);
756
757             /* Update potential sum for this i atom from the interaction with this j atom. */
758             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
759             velecsum         = _fjsp_add_v2r8(velecsum,velec);
760
761             fscal            = felec;
762
763             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
764
765             /* Update vectorial force */
766             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
767             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
768             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
769             
770             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
771             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
772             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
773
774             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
775
776             /* Inner loop uses 279 flops */
777         }
778
779         /* End of innermost loop */
780
781         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
782                                               f+i_coord_offset+DIM,fshift+i_shift_offset);
783
784         ggid                        = gid[iidx];
785         /* Update potential energies */
786         gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
787
788         /* Increment number of inner iterations */
789         inneriter                  += j_index_end - j_index_start;
790
791         /* Outer loop uses 19 flops */
792     }
793
794     /* Increment number of outer iterations */
795     outeriter        += nri;
796
797     /* Update outer/inner flops */
798
799     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*19 + inneriter*279);
800 }
801 /*
802  * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
803  * Electrostatics interaction: Coulomb
804  * VdW interaction:            None
805  * Geometry:                   Water4-Water4
806  * Calculate force/pot:        Force
807  */
808 void
809 nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
810                     (t_nblist                    * gmx_restrict       nlist,
811                      rvec                        * gmx_restrict          xx,
812                      rvec                        * gmx_restrict          ff,
813                      t_forcerec                  * gmx_restrict          fr,
814                      t_mdatoms                   * gmx_restrict     mdatoms,
815                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
816                      t_nrnb                      * gmx_restrict        nrnb)
817 {
818     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
819      * just 0 for non-waters.
820      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
821      * jnr indices corresponding to data put in the four positions in the SIMD register.
822      */
823     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
824     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
825     int              jnrA,jnrB;
826     int              j_coord_offsetA,j_coord_offsetB;
827     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
828     real             rcutoff_scalar;
829     real             *shiftvec,*fshift,*x,*f;
830     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
831     int              vdwioffset1;
832     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
833     int              vdwioffset2;
834     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
835     int              vdwioffset3;
836     _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
837     int              vdwjidx1A,vdwjidx1B;
838     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
839     int              vdwjidx2A,vdwjidx2B;
840     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
841     int              vdwjidx3A,vdwjidx3B;
842     _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
843     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
844     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
845     _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
846     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
847     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
848     _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
849     _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
850     _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
851     _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
852     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
853     real             *charge;
854     _fjsp_v2r8       itab_tmp;
855     _fjsp_v2r8       dummy_mask,cutoff_mask;
856     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
857     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
858     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
859
860     x                = xx[0];
861     f                = ff[0];
862
863     nri              = nlist->nri;
864     iinr             = nlist->iinr;
865     jindex           = nlist->jindex;
866     jjnr             = nlist->jjnr;
867     shiftidx         = nlist->shift;
868     gid              = nlist->gid;
869     shiftvec         = fr->shift_vec[0];
870     fshift           = fr->fshift[0];
871     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
872     charge           = mdatoms->chargeA;
873
874     /* Setup water-specific parameters */
875     inr              = nlist->iinr[0];
876     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
877     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
878     iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
879
880     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
881     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
882     jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
883     qq11             = _fjsp_mul_v2r8(iq1,jq1);
884     qq12             = _fjsp_mul_v2r8(iq1,jq2);
885     qq13             = _fjsp_mul_v2r8(iq1,jq3);
886     qq21             = _fjsp_mul_v2r8(iq2,jq1);
887     qq22             = _fjsp_mul_v2r8(iq2,jq2);
888     qq23             = _fjsp_mul_v2r8(iq2,jq3);
889     qq31             = _fjsp_mul_v2r8(iq3,jq1);
890     qq32             = _fjsp_mul_v2r8(iq3,jq2);
891     qq33             = _fjsp_mul_v2r8(iq3,jq3);
892
893     /* Avoid stupid compiler warnings */
894     jnrA = jnrB = 0;
895     j_coord_offsetA = 0;
896     j_coord_offsetB = 0;
897
898     outeriter        = 0;
899     inneriter        = 0;
900
901     /* Start outer loop over neighborlists */
902     for(iidx=0; iidx<nri; iidx++)
903     {
904         /* Load shift vector for this list */
905         i_shift_offset   = DIM*shiftidx[iidx];
906
907         /* Load limits for loop over neighbors */
908         j_index_start    = jindex[iidx];
909         j_index_end      = jindex[iidx+1];
910
911         /* Get outer coordinate index */
912         inr              = iinr[iidx];
913         i_coord_offset   = DIM*inr;
914
915         /* Load i particle coords and add shift vector */
916         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
917                                                  &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
918
919         fix1             = _fjsp_setzero_v2r8();
920         fiy1             = _fjsp_setzero_v2r8();
921         fiz1             = _fjsp_setzero_v2r8();
922         fix2             = _fjsp_setzero_v2r8();
923         fiy2             = _fjsp_setzero_v2r8();
924         fiz2             = _fjsp_setzero_v2r8();
925         fix3             = _fjsp_setzero_v2r8();
926         fiy3             = _fjsp_setzero_v2r8();
927         fiz3             = _fjsp_setzero_v2r8();
928
929         /* Start inner kernel loop */
930         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
931         {
932
933             /* Get j neighbor index, and coordinate index */
934             jnrA             = jjnr[jidx];
935             jnrB             = jjnr[jidx+1];
936             j_coord_offsetA  = DIM*jnrA;
937             j_coord_offsetB  = DIM*jnrB;
938
939             /* load j atom coordinates */
940             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
941                                               &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
942
943             /* Calculate displacement vector */
944             dx11             = _fjsp_sub_v2r8(ix1,jx1);
945             dy11             = _fjsp_sub_v2r8(iy1,jy1);
946             dz11             = _fjsp_sub_v2r8(iz1,jz1);
947             dx12             = _fjsp_sub_v2r8(ix1,jx2);
948             dy12             = _fjsp_sub_v2r8(iy1,jy2);
949             dz12             = _fjsp_sub_v2r8(iz1,jz2);
950             dx13             = _fjsp_sub_v2r8(ix1,jx3);
951             dy13             = _fjsp_sub_v2r8(iy1,jy3);
952             dz13             = _fjsp_sub_v2r8(iz1,jz3);
953             dx21             = _fjsp_sub_v2r8(ix2,jx1);
954             dy21             = _fjsp_sub_v2r8(iy2,jy1);
955             dz21             = _fjsp_sub_v2r8(iz2,jz1);
956             dx22             = _fjsp_sub_v2r8(ix2,jx2);
957             dy22             = _fjsp_sub_v2r8(iy2,jy2);
958             dz22             = _fjsp_sub_v2r8(iz2,jz2);
959             dx23             = _fjsp_sub_v2r8(ix2,jx3);
960             dy23             = _fjsp_sub_v2r8(iy2,jy3);
961             dz23             = _fjsp_sub_v2r8(iz2,jz3);
962             dx31             = _fjsp_sub_v2r8(ix3,jx1);
963             dy31             = _fjsp_sub_v2r8(iy3,jy1);
964             dz31             = _fjsp_sub_v2r8(iz3,jz1);
965             dx32             = _fjsp_sub_v2r8(ix3,jx2);
966             dy32             = _fjsp_sub_v2r8(iy3,jy2);
967             dz32             = _fjsp_sub_v2r8(iz3,jz2);
968             dx33             = _fjsp_sub_v2r8(ix3,jx3);
969             dy33             = _fjsp_sub_v2r8(iy3,jy3);
970             dz33             = _fjsp_sub_v2r8(iz3,jz3);
971
972             /* Calculate squared distance and things based on it */
973             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
974             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
975             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
976             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
977             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
978             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
979             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
980             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
981             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
982
983             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
984             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
985             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
986             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
987             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
988             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
989             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
990             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
991             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
992
993             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
994             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
995             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
996             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
997             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
998             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
999             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
1000             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
1001             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
1002
1003             fjx1             = _fjsp_setzero_v2r8();
1004             fjy1             = _fjsp_setzero_v2r8();
1005             fjz1             = _fjsp_setzero_v2r8();
1006             fjx2             = _fjsp_setzero_v2r8();
1007             fjy2             = _fjsp_setzero_v2r8();
1008             fjz2             = _fjsp_setzero_v2r8();
1009             fjx3             = _fjsp_setzero_v2r8();
1010             fjy3             = _fjsp_setzero_v2r8();
1011             fjz3             = _fjsp_setzero_v2r8();
1012
1013             /**************************
1014              * CALCULATE INTERACTIONS *
1015              **************************/
1016
1017             /* COULOMB ELECTROSTATICS */
1018             velec            = _fjsp_mul_v2r8(qq11,rinv11);
1019             felec            = _fjsp_mul_v2r8(velec,rinvsq11);
1020
1021             fscal            = felec;
1022
1023             /* Update vectorial force */
1024             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1025             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1026             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1027             
1028             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1029             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1030             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1031
1032             /**************************
1033              * CALCULATE INTERACTIONS *
1034              **************************/
1035
1036             /* COULOMB ELECTROSTATICS */
1037             velec            = _fjsp_mul_v2r8(qq12,rinv12);
1038             felec            = _fjsp_mul_v2r8(velec,rinvsq12);
1039
1040             fscal            = felec;
1041
1042             /* Update vectorial force */
1043             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1044             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1045             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1046             
1047             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1048             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1049             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1050
1051             /**************************
1052              * CALCULATE INTERACTIONS *
1053              **************************/
1054
1055             /* COULOMB ELECTROSTATICS */
1056             velec            = _fjsp_mul_v2r8(qq13,rinv13);
1057             felec            = _fjsp_mul_v2r8(velec,rinvsq13);
1058
1059             fscal            = felec;
1060
1061             /* Update vectorial force */
1062             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
1063             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
1064             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
1065             
1066             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
1067             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
1068             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
1069
1070             /**************************
1071              * CALCULATE INTERACTIONS *
1072              **************************/
1073
1074             /* COULOMB ELECTROSTATICS */
1075             velec            = _fjsp_mul_v2r8(qq21,rinv21);
1076             felec            = _fjsp_mul_v2r8(velec,rinvsq21);
1077
1078             fscal            = felec;
1079
1080             /* Update vectorial force */
1081             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1082             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1083             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1084             
1085             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1086             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1087             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1088
1089             /**************************
1090              * CALCULATE INTERACTIONS *
1091              **************************/
1092
1093             /* COULOMB ELECTROSTATICS */
1094             velec            = _fjsp_mul_v2r8(qq22,rinv22);
1095             felec            = _fjsp_mul_v2r8(velec,rinvsq22);
1096
1097             fscal            = felec;
1098
1099             /* Update vectorial force */
1100             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1101             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1102             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1103             
1104             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1105             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1106             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1107
1108             /**************************
1109              * CALCULATE INTERACTIONS *
1110              **************************/
1111
1112             /* COULOMB ELECTROSTATICS */
1113             velec            = _fjsp_mul_v2r8(qq23,rinv23);
1114             felec            = _fjsp_mul_v2r8(velec,rinvsq23);
1115
1116             fscal            = felec;
1117
1118             /* Update vectorial force */
1119             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
1120             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
1121             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
1122             
1123             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
1124             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
1125             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
1126
1127             /**************************
1128              * CALCULATE INTERACTIONS *
1129              **************************/
1130
1131             /* COULOMB ELECTROSTATICS */
1132             velec            = _fjsp_mul_v2r8(qq31,rinv31);
1133             felec            = _fjsp_mul_v2r8(velec,rinvsq31);
1134
1135             fscal            = felec;
1136
1137             /* Update vectorial force */
1138             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
1139             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
1140             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
1141             
1142             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
1143             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
1144             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
1145
1146             /**************************
1147              * CALCULATE INTERACTIONS *
1148              **************************/
1149
1150             /* COULOMB ELECTROSTATICS */
1151             velec            = _fjsp_mul_v2r8(qq32,rinv32);
1152             felec            = _fjsp_mul_v2r8(velec,rinvsq32);
1153
1154             fscal            = felec;
1155
1156             /* Update vectorial force */
1157             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
1158             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
1159             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
1160             
1161             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
1162             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
1163             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
1164
1165             /**************************
1166              * CALCULATE INTERACTIONS *
1167              **************************/
1168
1169             /* COULOMB ELECTROSTATICS */
1170             velec            = _fjsp_mul_v2r8(qq33,rinv33);
1171             felec            = _fjsp_mul_v2r8(velec,rinvsq33);
1172
1173             fscal            = felec;
1174
1175             /* Update vectorial force */
1176             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
1177             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
1178             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
1179             
1180             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
1181             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
1182             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
1183
1184             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1185
1186             /* Inner loop uses 270 flops */
1187         }
1188
1189         if(jidx<j_index_end)
1190         {
1191
1192             jnrA             = jjnr[jidx];
1193             j_coord_offsetA  = DIM*jnrA;
1194
1195             /* load j atom coordinates */
1196             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
1197                                               &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
1198
1199             /* Calculate displacement vector */
1200             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1201             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1202             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1203             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1204             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1205             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1206             dx13             = _fjsp_sub_v2r8(ix1,jx3);
1207             dy13             = _fjsp_sub_v2r8(iy1,jy3);
1208             dz13             = _fjsp_sub_v2r8(iz1,jz3);
1209             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1210             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1211             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1212             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1213             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1214             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1215             dx23             = _fjsp_sub_v2r8(ix2,jx3);
1216             dy23             = _fjsp_sub_v2r8(iy2,jy3);
1217             dz23             = _fjsp_sub_v2r8(iz2,jz3);
1218             dx31             = _fjsp_sub_v2r8(ix3,jx1);
1219             dy31             = _fjsp_sub_v2r8(iy3,jy1);
1220             dz31             = _fjsp_sub_v2r8(iz3,jz1);
1221             dx32             = _fjsp_sub_v2r8(ix3,jx2);
1222             dy32             = _fjsp_sub_v2r8(iy3,jy2);
1223             dz32             = _fjsp_sub_v2r8(iz3,jz2);
1224             dx33             = _fjsp_sub_v2r8(ix3,jx3);
1225             dy33             = _fjsp_sub_v2r8(iy3,jy3);
1226             dz33             = _fjsp_sub_v2r8(iz3,jz3);
1227
1228             /* Calculate squared distance and things based on it */
1229             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1230             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1231             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
1232             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1233             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1234             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
1235             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
1236             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
1237             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
1238
1239             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1240             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1241             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
1242             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1243             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1244             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
1245             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
1246             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
1247             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
1248
1249             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1250             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1251             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
1252             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1253             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1254             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
1255             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
1256             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
1257             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
1258
1259             fjx1             = _fjsp_setzero_v2r8();
1260             fjy1             = _fjsp_setzero_v2r8();
1261             fjz1             = _fjsp_setzero_v2r8();
1262             fjx2             = _fjsp_setzero_v2r8();
1263             fjy2             = _fjsp_setzero_v2r8();
1264             fjz2             = _fjsp_setzero_v2r8();
1265             fjx3             = _fjsp_setzero_v2r8();
1266             fjy3             = _fjsp_setzero_v2r8();
1267             fjz3             = _fjsp_setzero_v2r8();
1268
1269             /**************************
1270              * CALCULATE INTERACTIONS *
1271              **************************/
1272
1273             /* COULOMB ELECTROSTATICS */
1274             velec            = _fjsp_mul_v2r8(qq11,rinv11);
1275             felec            = _fjsp_mul_v2r8(velec,rinvsq11);
1276
1277             fscal            = felec;
1278
1279             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1280
1281             /* Update vectorial force */
1282             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1283             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1284             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1285             
1286             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1287             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1288             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1289
1290             /**************************
1291              * CALCULATE INTERACTIONS *
1292              **************************/
1293
1294             /* COULOMB ELECTROSTATICS */
1295             velec            = _fjsp_mul_v2r8(qq12,rinv12);
1296             felec            = _fjsp_mul_v2r8(velec,rinvsq12);
1297
1298             fscal            = felec;
1299
1300             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1301
1302             /* Update vectorial force */
1303             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1304             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1305             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1306             
1307             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1308             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1309             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1310
1311             /**************************
1312              * CALCULATE INTERACTIONS *
1313              **************************/
1314
1315             /* COULOMB ELECTROSTATICS */
1316             velec            = _fjsp_mul_v2r8(qq13,rinv13);
1317             felec            = _fjsp_mul_v2r8(velec,rinvsq13);
1318
1319             fscal            = felec;
1320
1321             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1322
1323             /* Update vectorial force */
1324             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
1325             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
1326             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
1327             
1328             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
1329             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
1330             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
1331
1332             /**************************
1333              * CALCULATE INTERACTIONS *
1334              **************************/
1335
1336             /* COULOMB ELECTROSTATICS */
1337             velec            = _fjsp_mul_v2r8(qq21,rinv21);
1338             felec            = _fjsp_mul_v2r8(velec,rinvsq21);
1339
1340             fscal            = felec;
1341
1342             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1343
1344             /* Update vectorial force */
1345             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1346             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1347             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1348             
1349             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1350             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1351             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1352
1353             /**************************
1354              * CALCULATE INTERACTIONS *
1355              **************************/
1356
1357             /* COULOMB ELECTROSTATICS */
1358             velec            = _fjsp_mul_v2r8(qq22,rinv22);
1359             felec            = _fjsp_mul_v2r8(velec,rinvsq22);
1360
1361             fscal            = felec;
1362
1363             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1364
1365             /* Update vectorial force */
1366             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1367             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1368             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1369             
1370             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1371             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1372             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1373
1374             /**************************
1375              * CALCULATE INTERACTIONS *
1376              **************************/
1377
1378             /* COULOMB ELECTROSTATICS */
1379             velec            = _fjsp_mul_v2r8(qq23,rinv23);
1380             felec            = _fjsp_mul_v2r8(velec,rinvsq23);
1381
1382             fscal            = felec;
1383
1384             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1385
1386             /* Update vectorial force */
1387             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
1388             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
1389             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
1390             
1391             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
1392             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
1393             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
1394
1395             /**************************
1396              * CALCULATE INTERACTIONS *
1397              **************************/
1398
1399             /* COULOMB ELECTROSTATICS */
1400             velec            = _fjsp_mul_v2r8(qq31,rinv31);
1401             felec            = _fjsp_mul_v2r8(velec,rinvsq31);
1402
1403             fscal            = felec;
1404
1405             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1406
1407             /* Update vectorial force */
1408             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
1409             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
1410             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
1411             
1412             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
1413             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
1414             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
1415
1416             /**************************
1417              * CALCULATE INTERACTIONS *
1418              **************************/
1419
1420             /* COULOMB ELECTROSTATICS */
1421             velec            = _fjsp_mul_v2r8(qq32,rinv32);
1422             felec            = _fjsp_mul_v2r8(velec,rinvsq32);
1423
1424             fscal            = felec;
1425
1426             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1427
1428             /* Update vectorial force */
1429             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
1430             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
1431             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
1432             
1433             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
1434             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
1435             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
1436
1437             /**************************
1438              * CALCULATE INTERACTIONS *
1439              **************************/
1440
1441             /* COULOMB ELECTROSTATICS */
1442             velec            = _fjsp_mul_v2r8(qq33,rinv33);
1443             felec            = _fjsp_mul_v2r8(velec,rinvsq33);
1444
1445             fscal            = felec;
1446
1447             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1448
1449             /* Update vectorial force */
1450             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
1451             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
1452             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
1453             
1454             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
1455             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
1456             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
1457
1458             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1459
1460             /* Inner loop uses 270 flops */
1461         }
1462
1463         /* End of innermost loop */
1464
1465         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
1466                                               f+i_coord_offset+DIM,fshift+i_shift_offset);
1467
1468         /* Increment number of inner iterations */
1469         inneriter                  += j_index_end - j_index_start;
1470
1471         /* Outer loop uses 18 flops */
1472     }
1473
1474     /* Increment number of outer iterations */
1475     outeriter        += nri;
1476
1477     /* Update outer/inner flops */
1478
1479     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_F,outeriter*18 + inneriter*270);
1480 }