Introduce gmxpre.h for truly global definitions
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_sparc64_hpc_ace_double / nb_kernel_ElecCoul_VdwNone_GeomW3W3_sparc64_hpc_ace_double.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
37  */
38 #include "gmxpre.h"
39
40 #include "config.h"
41
42 #include <math.h>
43
44 #include "../nb_kernel.h"
45 #include "gromacs/legacyheaders/types/simple.h"
46 #include "gromacs/math/vec.h"
47 #include "gromacs/legacyheaders/nrnb.h"
48
49 #include "kernelutil_sparc64_hpc_ace_double.h"
50
51 /*
52  * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
53  * Electrostatics interaction: Coulomb
54  * VdW interaction:            None
55  * Geometry:                   Water3-Water3
56  * Calculate force/pot:        PotentialAndForce
57  */
58 void
59 nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
60                     (t_nblist                    * gmx_restrict       nlist,
61                      rvec                        * gmx_restrict          xx,
62                      rvec                        * gmx_restrict          ff,
63                      t_forcerec                  * gmx_restrict          fr,
64                      t_mdatoms                   * gmx_restrict     mdatoms,
65                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
66                      t_nrnb                      * gmx_restrict        nrnb)
67 {
68     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
69      * just 0 for non-waters.
70      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
71      * jnr indices corresponding to data put in the four positions in the SIMD register.
72      */
73     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
74     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
75     int              jnrA,jnrB;
76     int              j_coord_offsetA,j_coord_offsetB;
77     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
78     real             rcutoff_scalar;
79     real             *shiftvec,*fshift,*x,*f;
80     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
81     int              vdwioffset0;
82     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
83     int              vdwioffset1;
84     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
85     int              vdwioffset2;
86     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
87     int              vdwjidx0A,vdwjidx0B;
88     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
89     int              vdwjidx1A,vdwjidx1B;
90     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
91     int              vdwjidx2A,vdwjidx2B;
92     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
93     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
94     _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
95     _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
96     _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
97     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
98     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
99     _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
100     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
101     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
102     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
103     real             *charge;
104     _fjsp_v2r8       itab_tmp;
105     _fjsp_v2r8       dummy_mask,cutoff_mask;
106     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
107     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
108     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
109
110     x                = xx[0];
111     f                = ff[0];
112
113     nri              = nlist->nri;
114     iinr             = nlist->iinr;
115     jindex           = nlist->jindex;
116     jjnr             = nlist->jjnr;
117     shiftidx         = nlist->shift;
118     gid              = nlist->gid;
119     shiftvec         = fr->shift_vec[0];
120     fshift           = fr->fshift[0];
121     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
122     charge           = mdatoms->chargeA;
123
124     /* Setup water-specific parameters */
125     inr              = nlist->iinr[0];
126     iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
127     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
128     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
129
130     jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
131     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
132     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
133     qq00             = _fjsp_mul_v2r8(iq0,jq0);
134     qq01             = _fjsp_mul_v2r8(iq0,jq1);
135     qq02             = _fjsp_mul_v2r8(iq0,jq2);
136     qq10             = _fjsp_mul_v2r8(iq1,jq0);
137     qq11             = _fjsp_mul_v2r8(iq1,jq1);
138     qq12             = _fjsp_mul_v2r8(iq1,jq2);
139     qq20             = _fjsp_mul_v2r8(iq2,jq0);
140     qq21             = _fjsp_mul_v2r8(iq2,jq1);
141     qq22             = _fjsp_mul_v2r8(iq2,jq2);
142
143     /* Avoid stupid compiler warnings */
144     jnrA = jnrB = 0;
145     j_coord_offsetA = 0;
146     j_coord_offsetB = 0;
147
148     outeriter        = 0;
149     inneriter        = 0;
150
151     /* Start outer loop over neighborlists */
152     for(iidx=0; iidx<nri; iidx++)
153     {
154         /* Load shift vector for this list */
155         i_shift_offset   = DIM*shiftidx[iidx];
156
157         /* Load limits for loop over neighbors */
158         j_index_start    = jindex[iidx];
159         j_index_end      = jindex[iidx+1];
160
161         /* Get outer coordinate index */
162         inr              = iinr[iidx];
163         i_coord_offset   = DIM*inr;
164
165         /* Load i particle coords and add shift vector */
166         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
167                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
168
169         fix0             = _fjsp_setzero_v2r8();
170         fiy0             = _fjsp_setzero_v2r8();
171         fiz0             = _fjsp_setzero_v2r8();
172         fix1             = _fjsp_setzero_v2r8();
173         fiy1             = _fjsp_setzero_v2r8();
174         fiz1             = _fjsp_setzero_v2r8();
175         fix2             = _fjsp_setzero_v2r8();
176         fiy2             = _fjsp_setzero_v2r8();
177         fiz2             = _fjsp_setzero_v2r8();
178
179         /* Reset potential sums */
180         velecsum         = _fjsp_setzero_v2r8();
181
182         /* Start inner kernel loop */
183         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
184         {
185
186             /* Get j neighbor index, and coordinate index */
187             jnrA             = jjnr[jidx];
188             jnrB             = jjnr[jidx+1];
189             j_coord_offsetA  = DIM*jnrA;
190             j_coord_offsetB  = DIM*jnrB;
191
192             /* load j atom coordinates */
193             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
194                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
195
196             /* Calculate displacement vector */
197             dx00             = _fjsp_sub_v2r8(ix0,jx0);
198             dy00             = _fjsp_sub_v2r8(iy0,jy0);
199             dz00             = _fjsp_sub_v2r8(iz0,jz0);
200             dx01             = _fjsp_sub_v2r8(ix0,jx1);
201             dy01             = _fjsp_sub_v2r8(iy0,jy1);
202             dz01             = _fjsp_sub_v2r8(iz0,jz1);
203             dx02             = _fjsp_sub_v2r8(ix0,jx2);
204             dy02             = _fjsp_sub_v2r8(iy0,jy2);
205             dz02             = _fjsp_sub_v2r8(iz0,jz2);
206             dx10             = _fjsp_sub_v2r8(ix1,jx0);
207             dy10             = _fjsp_sub_v2r8(iy1,jy0);
208             dz10             = _fjsp_sub_v2r8(iz1,jz0);
209             dx11             = _fjsp_sub_v2r8(ix1,jx1);
210             dy11             = _fjsp_sub_v2r8(iy1,jy1);
211             dz11             = _fjsp_sub_v2r8(iz1,jz1);
212             dx12             = _fjsp_sub_v2r8(ix1,jx2);
213             dy12             = _fjsp_sub_v2r8(iy1,jy2);
214             dz12             = _fjsp_sub_v2r8(iz1,jz2);
215             dx20             = _fjsp_sub_v2r8(ix2,jx0);
216             dy20             = _fjsp_sub_v2r8(iy2,jy0);
217             dz20             = _fjsp_sub_v2r8(iz2,jz0);
218             dx21             = _fjsp_sub_v2r8(ix2,jx1);
219             dy21             = _fjsp_sub_v2r8(iy2,jy1);
220             dz21             = _fjsp_sub_v2r8(iz2,jz1);
221             dx22             = _fjsp_sub_v2r8(ix2,jx2);
222             dy22             = _fjsp_sub_v2r8(iy2,jy2);
223             dz22             = _fjsp_sub_v2r8(iz2,jz2);
224
225             /* Calculate squared distance and things based on it */
226             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
227             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
228             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
229             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
230             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
231             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
232             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
233             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
234             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
235
236             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
237             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
238             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
239             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
240             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
241             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
242             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
243             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
244             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
245
246             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
247             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
248             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
249             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
250             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
251             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
252             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
253             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
254             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
255
256             fjx0             = _fjsp_setzero_v2r8();
257             fjy0             = _fjsp_setzero_v2r8();
258             fjz0             = _fjsp_setzero_v2r8();
259             fjx1             = _fjsp_setzero_v2r8();
260             fjy1             = _fjsp_setzero_v2r8();
261             fjz1             = _fjsp_setzero_v2r8();
262             fjx2             = _fjsp_setzero_v2r8();
263             fjy2             = _fjsp_setzero_v2r8();
264             fjz2             = _fjsp_setzero_v2r8();
265
266             /**************************
267              * CALCULATE INTERACTIONS *
268              **************************/
269
270             /* COULOMB ELECTROSTATICS */
271             velec            = _fjsp_mul_v2r8(qq00,rinv00);
272             felec            = _fjsp_mul_v2r8(velec,rinvsq00);
273
274             /* Update potential sum for this i atom from the interaction with this j atom. */
275             velecsum         = _fjsp_add_v2r8(velecsum,velec);
276
277             fscal            = felec;
278
279             /* Update vectorial force */
280             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
281             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
282             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
283             
284             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
285             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
286             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
287
288             /**************************
289              * CALCULATE INTERACTIONS *
290              **************************/
291
292             /* COULOMB ELECTROSTATICS */
293             velec            = _fjsp_mul_v2r8(qq01,rinv01);
294             felec            = _fjsp_mul_v2r8(velec,rinvsq01);
295
296             /* Update potential sum for this i atom from the interaction with this j atom. */
297             velecsum         = _fjsp_add_v2r8(velecsum,velec);
298
299             fscal            = felec;
300
301             /* Update vectorial force */
302             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
303             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
304             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
305             
306             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
307             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
308             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
309
310             /**************************
311              * CALCULATE INTERACTIONS *
312              **************************/
313
314             /* COULOMB ELECTROSTATICS */
315             velec            = _fjsp_mul_v2r8(qq02,rinv02);
316             felec            = _fjsp_mul_v2r8(velec,rinvsq02);
317
318             /* Update potential sum for this i atom from the interaction with this j atom. */
319             velecsum         = _fjsp_add_v2r8(velecsum,velec);
320
321             fscal            = felec;
322
323             /* Update vectorial force */
324             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
325             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
326             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
327             
328             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
329             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
330             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
331
332             /**************************
333              * CALCULATE INTERACTIONS *
334              **************************/
335
336             /* COULOMB ELECTROSTATICS */
337             velec            = _fjsp_mul_v2r8(qq10,rinv10);
338             felec            = _fjsp_mul_v2r8(velec,rinvsq10);
339
340             /* Update potential sum for this i atom from the interaction with this j atom. */
341             velecsum         = _fjsp_add_v2r8(velecsum,velec);
342
343             fscal            = felec;
344
345             /* Update vectorial force */
346             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
347             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
348             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
349             
350             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
351             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
352             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
353
354             /**************************
355              * CALCULATE INTERACTIONS *
356              **************************/
357
358             /* COULOMB ELECTROSTATICS */
359             velec            = _fjsp_mul_v2r8(qq11,rinv11);
360             felec            = _fjsp_mul_v2r8(velec,rinvsq11);
361
362             /* Update potential sum for this i atom from the interaction with this j atom. */
363             velecsum         = _fjsp_add_v2r8(velecsum,velec);
364
365             fscal            = felec;
366
367             /* Update vectorial force */
368             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
369             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
370             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
371             
372             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
373             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
374             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
375
376             /**************************
377              * CALCULATE INTERACTIONS *
378              **************************/
379
380             /* COULOMB ELECTROSTATICS */
381             velec            = _fjsp_mul_v2r8(qq12,rinv12);
382             felec            = _fjsp_mul_v2r8(velec,rinvsq12);
383
384             /* Update potential sum for this i atom from the interaction with this j atom. */
385             velecsum         = _fjsp_add_v2r8(velecsum,velec);
386
387             fscal            = felec;
388
389             /* Update vectorial force */
390             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
391             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
392             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
393             
394             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
395             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
396             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
397
398             /**************************
399              * CALCULATE INTERACTIONS *
400              **************************/
401
402             /* COULOMB ELECTROSTATICS */
403             velec            = _fjsp_mul_v2r8(qq20,rinv20);
404             felec            = _fjsp_mul_v2r8(velec,rinvsq20);
405
406             /* Update potential sum for this i atom from the interaction with this j atom. */
407             velecsum         = _fjsp_add_v2r8(velecsum,velec);
408
409             fscal            = felec;
410
411             /* Update vectorial force */
412             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
413             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
414             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
415             
416             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
417             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
418             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
419
420             /**************************
421              * CALCULATE INTERACTIONS *
422              **************************/
423
424             /* COULOMB ELECTROSTATICS */
425             velec            = _fjsp_mul_v2r8(qq21,rinv21);
426             felec            = _fjsp_mul_v2r8(velec,rinvsq21);
427
428             /* Update potential sum for this i atom from the interaction with this j atom. */
429             velecsum         = _fjsp_add_v2r8(velecsum,velec);
430
431             fscal            = felec;
432
433             /* Update vectorial force */
434             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
435             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
436             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
437             
438             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
439             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
440             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
441
442             /**************************
443              * CALCULATE INTERACTIONS *
444              **************************/
445
446             /* COULOMB ELECTROSTATICS */
447             velec            = _fjsp_mul_v2r8(qq22,rinv22);
448             felec            = _fjsp_mul_v2r8(velec,rinvsq22);
449
450             /* Update potential sum for this i atom from the interaction with this j atom. */
451             velecsum         = _fjsp_add_v2r8(velecsum,velec);
452
453             fscal            = felec;
454
455             /* Update vectorial force */
456             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
457             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
458             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
459             
460             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
461             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
462             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
463
464             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
465
466             /* Inner loop uses 279 flops */
467         }
468
469         if(jidx<j_index_end)
470         {
471
472             jnrA             = jjnr[jidx];
473             j_coord_offsetA  = DIM*jnrA;
474
475             /* load j atom coordinates */
476             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
477                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
478
479             /* Calculate displacement vector */
480             dx00             = _fjsp_sub_v2r8(ix0,jx0);
481             dy00             = _fjsp_sub_v2r8(iy0,jy0);
482             dz00             = _fjsp_sub_v2r8(iz0,jz0);
483             dx01             = _fjsp_sub_v2r8(ix0,jx1);
484             dy01             = _fjsp_sub_v2r8(iy0,jy1);
485             dz01             = _fjsp_sub_v2r8(iz0,jz1);
486             dx02             = _fjsp_sub_v2r8(ix0,jx2);
487             dy02             = _fjsp_sub_v2r8(iy0,jy2);
488             dz02             = _fjsp_sub_v2r8(iz0,jz2);
489             dx10             = _fjsp_sub_v2r8(ix1,jx0);
490             dy10             = _fjsp_sub_v2r8(iy1,jy0);
491             dz10             = _fjsp_sub_v2r8(iz1,jz0);
492             dx11             = _fjsp_sub_v2r8(ix1,jx1);
493             dy11             = _fjsp_sub_v2r8(iy1,jy1);
494             dz11             = _fjsp_sub_v2r8(iz1,jz1);
495             dx12             = _fjsp_sub_v2r8(ix1,jx2);
496             dy12             = _fjsp_sub_v2r8(iy1,jy2);
497             dz12             = _fjsp_sub_v2r8(iz1,jz2);
498             dx20             = _fjsp_sub_v2r8(ix2,jx0);
499             dy20             = _fjsp_sub_v2r8(iy2,jy0);
500             dz20             = _fjsp_sub_v2r8(iz2,jz0);
501             dx21             = _fjsp_sub_v2r8(ix2,jx1);
502             dy21             = _fjsp_sub_v2r8(iy2,jy1);
503             dz21             = _fjsp_sub_v2r8(iz2,jz1);
504             dx22             = _fjsp_sub_v2r8(ix2,jx2);
505             dy22             = _fjsp_sub_v2r8(iy2,jy2);
506             dz22             = _fjsp_sub_v2r8(iz2,jz2);
507
508             /* Calculate squared distance and things based on it */
509             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
510             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
511             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
512             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
513             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
514             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
515             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
516             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
517             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
518
519             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
520             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
521             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
522             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
523             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
524             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
525             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
526             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
527             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
528
529             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
530             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
531             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
532             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
533             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
534             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
535             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
536             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
537             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
538
539             fjx0             = _fjsp_setzero_v2r8();
540             fjy0             = _fjsp_setzero_v2r8();
541             fjz0             = _fjsp_setzero_v2r8();
542             fjx1             = _fjsp_setzero_v2r8();
543             fjy1             = _fjsp_setzero_v2r8();
544             fjz1             = _fjsp_setzero_v2r8();
545             fjx2             = _fjsp_setzero_v2r8();
546             fjy2             = _fjsp_setzero_v2r8();
547             fjz2             = _fjsp_setzero_v2r8();
548
549             /**************************
550              * CALCULATE INTERACTIONS *
551              **************************/
552
553             /* COULOMB ELECTROSTATICS */
554             velec            = _fjsp_mul_v2r8(qq00,rinv00);
555             felec            = _fjsp_mul_v2r8(velec,rinvsq00);
556
557             /* Update potential sum for this i atom from the interaction with this j atom. */
558             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
559             velecsum         = _fjsp_add_v2r8(velecsum,velec);
560
561             fscal            = felec;
562
563             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
564
565             /* Update vectorial force */
566             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
567             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
568             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
569             
570             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
571             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
572             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
573
574             /**************************
575              * CALCULATE INTERACTIONS *
576              **************************/
577
578             /* COULOMB ELECTROSTATICS */
579             velec            = _fjsp_mul_v2r8(qq01,rinv01);
580             felec            = _fjsp_mul_v2r8(velec,rinvsq01);
581
582             /* Update potential sum for this i atom from the interaction with this j atom. */
583             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
584             velecsum         = _fjsp_add_v2r8(velecsum,velec);
585
586             fscal            = felec;
587
588             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
589
590             /* Update vectorial force */
591             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
592             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
593             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
594             
595             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
596             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
597             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
598
599             /**************************
600              * CALCULATE INTERACTIONS *
601              **************************/
602
603             /* COULOMB ELECTROSTATICS */
604             velec            = _fjsp_mul_v2r8(qq02,rinv02);
605             felec            = _fjsp_mul_v2r8(velec,rinvsq02);
606
607             /* Update potential sum for this i atom from the interaction with this j atom. */
608             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
609             velecsum         = _fjsp_add_v2r8(velecsum,velec);
610
611             fscal            = felec;
612
613             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
614
615             /* Update vectorial force */
616             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
617             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
618             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
619             
620             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
621             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
622             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
623
624             /**************************
625              * CALCULATE INTERACTIONS *
626              **************************/
627
628             /* COULOMB ELECTROSTATICS */
629             velec            = _fjsp_mul_v2r8(qq10,rinv10);
630             felec            = _fjsp_mul_v2r8(velec,rinvsq10);
631
632             /* Update potential sum for this i atom from the interaction with this j atom. */
633             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
634             velecsum         = _fjsp_add_v2r8(velecsum,velec);
635
636             fscal            = felec;
637
638             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
639
640             /* Update vectorial force */
641             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
642             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
643             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
644             
645             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
646             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
647             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
648
649             /**************************
650              * CALCULATE INTERACTIONS *
651              **************************/
652
653             /* COULOMB ELECTROSTATICS */
654             velec            = _fjsp_mul_v2r8(qq11,rinv11);
655             felec            = _fjsp_mul_v2r8(velec,rinvsq11);
656
657             /* Update potential sum for this i atom from the interaction with this j atom. */
658             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
659             velecsum         = _fjsp_add_v2r8(velecsum,velec);
660
661             fscal            = felec;
662
663             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
664
665             /* Update vectorial force */
666             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
667             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
668             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
669             
670             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
671             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
672             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
673
674             /**************************
675              * CALCULATE INTERACTIONS *
676              **************************/
677
678             /* COULOMB ELECTROSTATICS */
679             velec            = _fjsp_mul_v2r8(qq12,rinv12);
680             felec            = _fjsp_mul_v2r8(velec,rinvsq12);
681
682             /* Update potential sum for this i atom from the interaction with this j atom. */
683             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
684             velecsum         = _fjsp_add_v2r8(velecsum,velec);
685
686             fscal            = felec;
687
688             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
689
690             /* Update vectorial force */
691             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
692             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
693             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
694             
695             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
696             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
697             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
698
699             /**************************
700              * CALCULATE INTERACTIONS *
701              **************************/
702
703             /* COULOMB ELECTROSTATICS */
704             velec            = _fjsp_mul_v2r8(qq20,rinv20);
705             felec            = _fjsp_mul_v2r8(velec,rinvsq20);
706
707             /* Update potential sum for this i atom from the interaction with this j atom. */
708             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
709             velecsum         = _fjsp_add_v2r8(velecsum,velec);
710
711             fscal            = felec;
712
713             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
714
715             /* Update vectorial force */
716             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
717             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
718             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
719             
720             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
721             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
722             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
723
724             /**************************
725              * CALCULATE INTERACTIONS *
726              **************************/
727
728             /* COULOMB ELECTROSTATICS */
729             velec            = _fjsp_mul_v2r8(qq21,rinv21);
730             felec            = _fjsp_mul_v2r8(velec,rinvsq21);
731
732             /* Update potential sum for this i atom from the interaction with this j atom. */
733             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
734             velecsum         = _fjsp_add_v2r8(velecsum,velec);
735
736             fscal            = felec;
737
738             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
739
740             /* Update vectorial force */
741             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
742             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
743             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
744             
745             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
746             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
747             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
748
749             /**************************
750              * CALCULATE INTERACTIONS *
751              **************************/
752
753             /* COULOMB ELECTROSTATICS */
754             velec            = _fjsp_mul_v2r8(qq22,rinv22);
755             felec            = _fjsp_mul_v2r8(velec,rinvsq22);
756
757             /* Update potential sum for this i atom from the interaction with this j atom. */
758             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
759             velecsum         = _fjsp_add_v2r8(velecsum,velec);
760
761             fscal            = felec;
762
763             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
764
765             /* Update vectorial force */
766             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
767             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
768             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
769             
770             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
771             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
772             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
773
774             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
775
776             /* Inner loop uses 279 flops */
777         }
778
779         /* End of innermost loop */
780
781         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
782                                               f+i_coord_offset,fshift+i_shift_offset);
783
784         ggid                        = gid[iidx];
785         /* Update potential energies */
786         gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
787
788         /* Increment number of inner iterations */
789         inneriter                  += j_index_end - j_index_start;
790
791         /* Outer loop uses 19 flops */
792     }
793
794     /* Increment number of outer iterations */
795     outeriter        += nri;
796
797     /* Update outer/inner flops */
798
799     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*279);
800 }
801 /*
802  * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
803  * Electrostatics interaction: Coulomb
804  * VdW interaction:            None
805  * Geometry:                   Water3-Water3
806  * Calculate force/pot:        Force
807  */
808 void
809 nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
810                     (t_nblist                    * gmx_restrict       nlist,
811                      rvec                        * gmx_restrict          xx,
812                      rvec                        * gmx_restrict          ff,
813                      t_forcerec                  * gmx_restrict          fr,
814                      t_mdatoms                   * gmx_restrict     mdatoms,
815                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
816                      t_nrnb                      * gmx_restrict        nrnb)
817 {
818     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
819      * just 0 for non-waters.
820      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
821      * jnr indices corresponding to data put in the four positions in the SIMD register.
822      */
823     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
824     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
825     int              jnrA,jnrB;
826     int              j_coord_offsetA,j_coord_offsetB;
827     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
828     real             rcutoff_scalar;
829     real             *shiftvec,*fshift,*x,*f;
830     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
831     int              vdwioffset0;
832     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
833     int              vdwioffset1;
834     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
835     int              vdwioffset2;
836     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
837     int              vdwjidx0A,vdwjidx0B;
838     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
839     int              vdwjidx1A,vdwjidx1B;
840     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
841     int              vdwjidx2A,vdwjidx2B;
842     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
843     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
844     _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
845     _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
846     _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
847     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
848     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
849     _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
850     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
851     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
852     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
853     real             *charge;
854     _fjsp_v2r8       itab_tmp;
855     _fjsp_v2r8       dummy_mask,cutoff_mask;
856     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
857     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
858     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
859
860     x                = xx[0];
861     f                = ff[0];
862
863     nri              = nlist->nri;
864     iinr             = nlist->iinr;
865     jindex           = nlist->jindex;
866     jjnr             = nlist->jjnr;
867     shiftidx         = nlist->shift;
868     gid              = nlist->gid;
869     shiftvec         = fr->shift_vec[0];
870     fshift           = fr->fshift[0];
871     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
872     charge           = mdatoms->chargeA;
873
874     /* Setup water-specific parameters */
875     inr              = nlist->iinr[0];
876     iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
877     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
878     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
879
880     jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
881     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
882     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
883     qq00             = _fjsp_mul_v2r8(iq0,jq0);
884     qq01             = _fjsp_mul_v2r8(iq0,jq1);
885     qq02             = _fjsp_mul_v2r8(iq0,jq2);
886     qq10             = _fjsp_mul_v2r8(iq1,jq0);
887     qq11             = _fjsp_mul_v2r8(iq1,jq1);
888     qq12             = _fjsp_mul_v2r8(iq1,jq2);
889     qq20             = _fjsp_mul_v2r8(iq2,jq0);
890     qq21             = _fjsp_mul_v2r8(iq2,jq1);
891     qq22             = _fjsp_mul_v2r8(iq2,jq2);
892
893     /* Avoid stupid compiler warnings */
894     jnrA = jnrB = 0;
895     j_coord_offsetA = 0;
896     j_coord_offsetB = 0;
897
898     outeriter        = 0;
899     inneriter        = 0;
900
901     /* Start outer loop over neighborlists */
902     for(iidx=0; iidx<nri; iidx++)
903     {
904         /* Load shift vector for this list */
905         i_shift_offset   = DIM*shiftidx[iidx];
906
907         /* Load limits for loop over neighbors */
908         j_index_start    = jindex[iidx];
909         j_index_end      = jindex[iidx+1];
910
911         /* Get outer coordinate index */
912         inr              = iinr[iidx];
913         i_coord_offset   = DIM*inr;
914
915         /* Load i particle coords and add shift vector */
916         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
917                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
918
919         fix0             = _fjsp_setzero_v2r8();
920         fiy0             = _fjsp_setzero_v2r8();
921         fiz0             = _fjsp_setzero_v2r8();
922         fix1             = _fjsp_setzero_v2r8();
923         fiy1             = _fjsp_setzero_v2r8();
924         fiz1             = _fjsp_setzero_v2r8();
925         fix2             = _fjsp_setzero_v2r8();
926         fiy2             = _fjsp_setzero_v2r8();
927         fiz2             = _fjsp_setzero_v2r8();
928
929         /* Start inner kernel loop */
930         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
931         {
932
933             /* Get j neighbor index, and coordinate index */
934             jnrA             = jjnr[jidx];
935             jnrB             = jjnr[jidx+1];
936             j_coord_offsetA  = DIM*jnrA;
937             j_coord_offsetB  = DIM*jnrB;
938
939             /* load j atom coordinates */
940             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
941                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
942
943             /* Calculate displacement vector */
944             dx00             = _fjsp_sub_v2r8(ix0,jx0);
945             dy00             = _fjsp_sub_v2r8(iy0,jy0);
946             dz00             = _fjsp_sub_v2r8(iz0,jz0);
947             dx01             = _fjsp_sub_v2r8(ix0,jx1);
948             dy01             = _fjsp_sub_v2r8(iy0,jy1);
949             dz01             = _fjsp_sub_v2r8(iz0,jz1);
950             dx02             = _fjsp_sub_v2r8(ix0,jx2);
951             dy02             = _fjsp_sub_v2r8(iy0,jy2);
952             dz02             = _fjsp_sub_v2r8(iz0,jz2);
953             dx10             = _fjsp_sub_v2r8(ix1,jx0);
954             dy10             = _fjsp_sub_v2r8(iy1,jy0);
955             dz10             = _fjsp_sub_v2r8(iz1,jz0);
956             dx11             = _fjsp_sub_v2r8(ix1,jx1);
957             dy11             = _fjsp_sub_v2r8(iy1,jy1);
958             dz11             = _fjsp_sub_v2r8(iz1,jz1);
959             dx12             = _fjsp_sub_v2r8(ix1,jx2);
960             dy12             = _fjsp_sub_v2r8(iy1,jy2);
961             dz12             = _fjsp_sub_v2r8(iz1,jz2);
962             dx20             = _fjsp_sub_v2r8(ix2,jx0);
963             dy20             = _fjsp_sub_v2r8(iy2,jy0);
964             dz20             = _fjsp_sub_v2r8(iz2,jz0);
965             dx21             = _fjsp_sub_v2r8(ix2,jx1);
966             dy21             = _fjsp_sub_v2r8(iy2,jy1);
967             dz21             = _fjsp_sub_v2r8(iz2,jz1);
968             dx22             = _fjsp_sub_v2r8(ix2,jx2);
969             dy22             = _fjsp_sub_v2r8(iy2,jy2);
970             dz22             = _fjsp_sub_v2r8(iz2,jz2);
971
972             /* Calculate squared distance and things based on it */
973             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
974             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
975             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
976             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
977             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
978             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
979             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
980             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
981             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
982
983             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
984             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
985             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
986             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
987             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
988             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
989             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
990             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
991             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
992
993             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
994             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
995             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
996             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
997             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
998             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
999             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
1000             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1001             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1002
1003             fjx0             = _fjsp_setzero_v2r8();
1004             fjy0             = _fjsp_setzero_v2r8();
1005             fjz0             = _fjsp_setzero_v2r8();
1006             fjx1             = _fjsp_setzero_v2r8();
1007             fjy1             = _fjsp_setzero_v2r8();
1008             fjz1             = _fjsp_setzero_v2r8();
1009             fjx2             = _fjsp_setzero_v2r8();
1010             fjy2             = _fjsp_setzero_v2r8();
1011             fjz2             = _fjsp_setzero_v2r8();
1012
1013             /**************************
1014              * CALCULATE INTERACTIONS *
1015              **************************/
1016
1017             /* COULOMB ELECTROSTATICS */
1018             velec            = _fjsp_mul_v2r8(qq00,rinv00);
1019             felec            = _fjsp_mul_v2r8(velec,rinvsq00);
1020
1021             fscal            = felec;
1022
1023             /* Update vectorial force */
1024             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1025             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1026             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1027             
1028             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1029             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1030             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1031
1032             /**************************
1033              * CALCULATE INTERACTIONS *
1034              **************************/
1035
1036             /* COULOMB ELECTROSTATICS */
1037             velec            = _fjsp_mul_v2r8(qq01,rinv01);
1038             felec            = _fjsp_mul_v2r8(velec,rinvsq01);
1039
1040             fscal            = felec;
1041
1042             /* Update vectorial force */
1043             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
1044             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1045             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1046             
1047             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1048             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1049             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1050
1051             /**************************
1052              * CALCULATE INTERACTIONS *
1053              **************************/
1054
1055             /* COULOMB ELECTROSTATICS */
1056             velec            = _fjsp_mul_v2r8(qq02,rinv02);
1057             felec            = _fjsp_mul_v2r8(velec,rinvsq02);
1058
1059             fscal            = felec;
1060
1061             /* Update vectorial force */
1062             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
1063             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1064             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1065             
1066             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1067             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1068             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1069
1070             /**************************
1071              * CALCULATE INTERACTIONS *
1072              **************************/
1073
1074             /* COULOMB ELECTROSTATICS */
1075             velec            = _fjsp_mul_v2r8(qq10,rinv10);
1076             felec            = _fjsp_mul_v2r8(velec,rinvsq10);
1077
1078             fscal            = felec;
1079
1080             /* Update vectorial force */
1081             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
1082             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1083             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1084             
1085             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1086             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1087             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1088
1089             /**************************
1090              * CALCULATE INTERACTIONS *
1091              **************************/
1092
1093             /* COULOMB ELECTROSTATICS */
1094             velec            = _fjsp_mul_v2r8(qq11,rinv11);
1095             felec            = _fjsp_mul_v2r8(velec,rinvsq11);
1096
1097             fscal            = felec;
1098
1099             /* Update vectorial force */
1100             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1101             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1102             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1103             
1104             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1105             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1106             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1107
1108             /**************************
1109              * CALCULATE INTERACTIONS *
1110              **************************/
1111
1112             /* COULOMB ELECTROSTATICS */
1113             velec            = _fjsp_mul_v2r8(qq12,rinv12);
1114             felec            = _fjsp_mul_v2r8(velec,rinvsq12);
1115
1116             fscal            = felec;
1117
1118             /* Update vectorial force */
1119             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1120             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1121             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1122             
1123             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1124             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1125             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1126
1127             /**************************
1128              * CALCULATE INTERACTIONS *
1129              **************************/
1130
1131             /* COULOMB ELECTROSTATICS */
1132             velec            = _fjsp_mul_v2r8(qq20,rinv20);
1133             felec            = _fjsp_mul_v2r8(velec,rinvsq20);
1134
1135             fscal            = felec;
1136
1137             /* Update vectorial force */
1138             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
1139             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1140             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1141             
1142             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1143             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1144             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1145
1146             /**************************
1147              * CALCULATE INTERACTIONS *
1148              **************************/
1149
1150             /* COULOMB ELECTROSTATICS */
1151             velec            = _fjsp_mul_v2r8(qq21,rinv21);
1152             felec            = _fjsp_mul_v2r8(velec,rinvsq21);
1153
1154             fscal            = felec;
1155
1156             /* Update vectorial force */
1157             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1158             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1159             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1160             
1161             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1162             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1163             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1164
1165             /**************************
1166              * CALCULATE INTERACTIONS *
1167              **************************/
1168
1169             /* COULOMB ELECTROSTATICS */
1170             velec            = _fjsp_mul_v2r8(qq22,rinv22);
1171             felec            = _fjsp_mul_v2r8(velec,rinvsq22);
1172
1173             fscal            = felec;
1174
1175             /* Update vectorial force */
1176             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1177             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1178             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1179             
1180             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1181             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1182             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1183
1184             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1185
1186             /* Inner loop uses 270 flops */
1187         }
1188
1189         if(jidx<j_index_end)
1190         {
1191
1192             jnrA             = jjnr[jidx];
1193             j_coord_offsetA  = DIM*jnrA;
1194
1195             /* load j atom coordinates */
1196             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1197                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1198
1199             /* Calculate displacement vector */
1200             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1201             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1202             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1203             dx01             = _fjsp_sub_v2r8(ix0,jx1);
1204             dy01             = _fjsp_sub_v2r8(iy0,jy1);
1205             dz01             = _fjsp_sub_v2r8(iz0,jz1);
1206             dx02             = _fjsp_sub_v2r8(ix0,jx2);
1207             dy02             = _fjsp_sub_v2r8(iy0,jy2);
1208             dz02             = _fjsp_sub_v2r8(iz0,jz2);
1209             dx10             = _fjsp_sub_v2r8(ix1,jx0);
1210             dy10             = _fjsp_sub_v2r8(iy1,jy0);
1211             dz10             = _fjsp_sub_v2r8(iz1,jz0);
1212             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1213             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1214             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1215             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1216             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1217             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1218             dx20             = _fjsp_sub_v2r8(ix2,jx0);
1219             dy20             = _fjsp_sub_v2r8(iy2,jy0);
1220             dz20             = _fjsp_sub_v2r8(iz2,jz0);
1221             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1222             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1223             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1224             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1225             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1226             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1227
1228             /* Calculate squared distance and things based on it */
1229             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1230             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1231             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1232             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1233             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1234             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1235             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1236             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1237             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1238
1239             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
1240             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
1241             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
1242             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
1243             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1244             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1245             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
1246             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1247             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1248
1249             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
1250             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
1251             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
1252             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
1253             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1254             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1255             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
1256             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1257             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1258
1259             fjx0             = _fjsp_setzero_v2r8();
1260             fjy0             = _fjsp_setzero_v2r8();
1261             fjz0             = _fjsp_setzero_v2r8();
1262             fjx1             = _fjsp_setzero_v2r8();
1263             fjy1             = _fjsp_setzero_v2r8();
1264             fjz1             = _fjsp_setzero_v2r8();
1265             fjx2             = _fjsp_setzero_v2r8();
1266             fjy2             = _fjsp_setzero_v2r8();
1267             fjz2             = _fjsp_setzero_v2r8();
1268
1269             /**************************
1270              * CALCULATE INTERACTIONS *
1271              **************************/
1272
1273             /* COULOMB ELECTROSTATICS */
1274             velec            = _fjsp_mul_v2r8(qq00,rinv00);
1275             felec            = _fjsp_mul_v2r8(velec,rinvsq00);
1276
1277             fscal            = felec;
1278
1279             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1280
1281             /* Update vectorial force */
1282             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1283             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1284             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1285             
1286             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1287             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1288             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1289
1290             /**************************
1291              * CALCULATE INTERACTIONS *
1292              **************************/
1293
1294             /* COULOMB ELECTROSTATICS */
1295             velec            = _fjsp_mul_v2r8(qq01,rinv01);
1296             felec            = _fjsp_mul_v2r8(velec,rinvsq01);
1297
1298             fscal            = felec;
1299
1300             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1301
1302             /* Update vectorial force */
1303             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
1304             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1305             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1306             
1307             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1308             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1309             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1310
1311             /**************************
1312              * CALCULATE INTERACTIONS *
1313              **************************/
1314
1315             /* COULOMB ELECTROSTATICS */
1316             velec            = _fjsp_mul_v2r8(qq02,rinv02);
1317             felec            = _fjsp_mul_v2r8(velec,rinvsq02);
1318
1319             fscal            = felec;
1320
1321             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1322
1323             /* Update vectorial force */
1324             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
1325             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1326             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1327             
1328             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1329             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1330             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1331
1332             /**************************
1333              * CALCULATE INTERACTIONS *
1334              **************************/
1335
1336             /* COULOMB ELECTROSTATICS */
1337             velec            = _fjsp_mul_v2r8(qq10,rinv10);
1338             felec            = _fjsp_mul_v2r8(velec,rinvsq10);
1339
1340             fscal            = felec;
1341
1342             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1343
1344             /* Update vectorial force */
1345             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
1346             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1347             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1348             
1349             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1350             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1351             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1352
1353             /**************************
1354              * CALCULATE INTERACTIONS *
1355              **************************/
1356
1357             /* COULOMB ELECTROSTATICS */
1358             velec            = _fjsp_mul_v2r8(qq11,rinv11);
1359             felec            = _fjsp_mul_v2r8(velec,rinvsq11);
1360
1361             fscal            = felec;
1362
1363             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1364
1365             /* Update vectorial force */
1366             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1367             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1368             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1369             
1370             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1371             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1372             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1373
1374             /**************************
1375              * CALCULATE INTERACTIONS *
1376              **************************/
1377
1378             /* COULOMB ELECTROSTATICS */
1379             velec            = _fjsp_mul_v2r8(qq12,rinv12);
1380             felec            = _fjsp_mul_v2r8(velec,rinvsq12);
1381
1382             fscal            = felec;
1383
1384             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1385
1386             /* Update vectorial force */
1387             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1388             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1389             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1390             
1391             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1392             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1393             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1394
1395             /**************************
1396              * CALCULATE INTERACTIONS *
1397              **************************/
1398
1399             /* COULOMB ELECTROSTATICS */
1400             velec            = _fjsp_mul_v2r8(qq20,rinv20);
1401             felec            = _fjsp_mul_v2r8(velec,rinvsq20);
1402
1403             fscal            = felec;
1404
1405             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1406
1407             /* Update vectorial force */
1408             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
1409             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1410             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1411             
1412             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1413             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1414             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1415
1416             /**************************
1417              * CALCULATE INTERACTIONS *
1418              **************************/
1419
1420             /* COULOMB ELECTROSTATICS */
1421             velec            = _fjsp_mul_v2r8(qq21,rinv21);
1422             felec            = _fjsp_mul_v2r8(velec,rinvsq21);
1423
1424             fscal            = felec;
1425
1426             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1427
1428             /* Update vectorial force */
1429             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1430             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1431             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1432             
1433             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1434             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1435             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1436
1437             /**************************
1438              * CALCULATE INTERACTIONS *
1439              **************************/
1440
1441             /* COULOMB ELECTROSTATICS */
1442             velec            = _fjsp_mul_v2r8(qq22,rinv22);
1443             felec            = _fjsp_mul_v2r8(velec,rinvsq22);
1444
1445             fscal            = felec;
1446
1447             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1448
1449             /* Update vectorial force */
1450             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1451             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1452             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1453             
1454             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1455             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1456             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1457
1458             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1459
1460             /* Inner loop uses 270 flops */
1461         }
1462
1463         /* End of innermost loop */
1464
1465         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1466                                               f+i_coord_offset,fshift+i_shift_offset);
1467
1468         /* Increment number of inner iterations */
1469         inneriter                  += j_index_end - j_index_start;
1470
1471         /* Outer loop uses 18 flops */
1472     }
1473
1474     /* Increment number of outer iterations */
1475     outeriter        += nri;
1476
1477     /* Update outer/inner flops */
1478
1479     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*18 + inneriter*270);
1480 }