5f0b6c12d445efd10159a86961f92ece921f676d
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_sparc64_hpc_ace_double / nb_kernel_ElecRF_VdwNone_GeomW3W3_sparc64_hpc_ace_double.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
37  */
38 #include "config.h"
39
40 #include <math.h>
41
42 #include "../nb_kernel.h"
43 #include "gromacs/legacyheaders/types/simple.h"
44 #include "gromacs/math/vec.h"
45 #include "gromacs/legacyheaders/nrnb.h"
46
47 #include "kernelutil_sparc64_hpc_ace_double.h"
48
49 /*
50  * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
51  * Electrostatics interaction: ReactionField
52  * VdW interaction:            None
53  * Geometry:                   Water3-Water3
54  * Calculate force/pot:        PotentialAndForce
55  */
56 void
57 nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
58                     (t_nblist                    * gmx_restrict       nlist,
59                      rvec                        * gmx_restrict          xx,
60                      rvec                        * gmx_restrict          ff,
61                      t_forcerec                  * gmx_restrict          fr,
62                      t_mdatoms                   * gmx_restrict     mdatoms,
63                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
64                      t_nrnb                      * gmx_restrict        nrnb)
65 {
66     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
67      * just 0 for non-waters.
68      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
69      * jnr indices corresponding to data put in the four positions in the SIMD register.
70      */
71     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
72     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
73     int              jnrA,jnrB;
74     int              j_coord_offsetA,j_coord_offsetB;
75     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
76     real             rcutoff_scalar;
77     real             *shiftvec,*fshift,*x,*f;
78     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
79     int              vdwioffset0;
80     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
81     int              vdwioffset1;
82     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
83     int              vdwioffset2;
84     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
85     int              vdwjidx0A,vdwjidx0B;
86     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
87     int              vdwjidx1A,vdwjidx1B;
88     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
89     int              vdwjidx2A,vdwjidx2B;
90     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
91     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
92     _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
93     _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
94     _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
95     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
96     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
97     _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
98     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
99     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
100     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
101     real             *charge;
102     _fjsp_v2r8       itab_tmp;
103     _fjsp_v2r8       dummy_mask,cutoff_mask;
104     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
105     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
106     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
107
108     x                = xx[0];
109     f                = ff[0];
110
111     nri              = nlist->nri;
112     iinr             = nlist->iinr;
113     jindex           = nlist->jindex;
114     jjnr             = nlist->jjnr;
115     shiftidx         = nlist->shift;
116     gid              = nlist->gid;
117     shiftvec         = fr->shift_vec[0];
118     fshift           = fr->fshift[0];
119     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
120     charge           = mdatoms->chargeA;
121     krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
122     krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
123     crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
124
125     /* Setup water-specific parameters */
126     inr              = nlist->iinr[0];
127     iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
128     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
129     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
130
131     jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
132     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
133     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
134     qq00             = _fjsp_mul_v2r8(iq0,jq0);
135     qq01             = _fjsp_mul_v2r8(iq0,jq1);
136     qq02             = _fjsp_mul_v2r8(iq0,jq2);
137     qq10             = _fjsp_mul_v2r8(iq1,jq0);
138     qq11             = _fjsp_mul_v2r8(iq1,jq1);
139     qq12             = _fjsp_mul_v2r8(iq1,jq2);
140     qq20             = _fjsp_mul_v2r8(iq2,jq0);
141     qq21             = _fjsp_mul_v2r8(iq2,jq1);
142     qq22             = _fjsp_mul_v2r8(iq2,jq2);
143
144     /* Avoid stupid compiler warnings */
145     jnrA = jnrB = 0;
146     j_coord_offsetA = 0;
147     j_coord_offsetB = 0;
148
149     outeriter        = 0;
150     inneriter        = 0;
151
152     /* Start outer loop over neighborlists */
153     for(iidx=0; iidx<nri; iidx++)
154     {
155         /* Load shift vector for this list */
156         i_shift_offset   = DIM*shiftidx[iidx];
157
158         /* Load limits for loop over neighbors */
159         j_index_start    = jindex[iidx];
160         j_index_end      = jindex[iidx+1];
161
162         /* Get outer coordinate index */
163         inr              = iinr[iidx];
164         i_coord_offset   = DIM*inr;
165
166         /* Load i particle coords and add shift vector */
167         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
168                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
169
170         fix0             = _fjsp_setzero_v2r8();
171         fiy0             = _fjsp_setzero_v2r8();
172         fiz0             = _fjsp_setzero_v2r8();
173         fix1             = _fjsp_setzero_v2r8();
174         fiy1             = _fjsp_setzero_v2r8();
175         fiz1             = _fjsp_setzero_v2r8();
176         fix2             = _fjsp_setzero_v2r8();
177         fiy2             = _fjsp_setzero_v2r8();
178         fiz2             = _fjsp_setzero_v2r8();
179
180         /* Reset potential sums */
181         velecsum         = _fjsp_setzero_v2r8();
182
183         /* Start inner kernel loop */
184         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
185         {
186
187             /* Get j neighbor index, and coordinate index */
188             jnrA             = jjnr[jidx];
189             jnrB             = jjnr[jidx+1];
190             j_coord_offsetA  = DIM*jnrA;
191             j_coord_offsetB  = DIM*jnrB;
192
193             /* load j atom coordinates */
194             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
195                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
196
197             /* Calculate displacement vector */
198             dx00             = _fjsp_sub_v2r8(ix0,jx0);
199             dy00             = _fjsp_sub_v2r8(iy0,jy0);
200             dz00             = _fjsp_sub_v2r8(iz0,jz0);
201             dx01             = _fjsp_sub_v2r8(ix0,jx1);
202             dy01             = _fjsp_sub_v2r8(iy0,jy1);
203             dz01             = _fjsp_sub_v2r8(iz0,jz1);
204             dx02             = _fjsp_sub_v2r8(ix0,jx2);
205             dy02             = _fjsp_sub_v2r8(iy0,jy2);
206             dz02             = _fjsp_sub_v2r8(iz0,jz2);
207             dx10             = _fjsp_sub_v2r8(ix1,jx0);
208             dy10             = _fjsp_sub_v2r8(iy1,jy0);
209             dz10             = _fjsp_sub_v2r8(iz1,jz0);
210             dx11             = _fjsp_sub_v2r8(ix1,jx1);
211             dy11             = _fjsp_sub_v2r8(iy1,jy1);
212             dz11             = _fjsp_sub_v2r8(iz1,jz1);
213             dx12             = _fjsp_sub_v2r8(ix1,jx2);
214             dy12             = _fjsp_sub_v2r8(iy1,jy2);
215             dz12             = _fjsp_sub_v2r8(iz1,jz2);
216             dx20             = _fjsp_sub_v2r8(ix2,jx0);
217             dy20             = _fjsp_sub_v2r8(iy2,jy0);
218             dz20             = _fjsp_sub_v2r8(iz2,jz0);
219             dx21             = _fjsp_sub_v2r8(ix2,jx1);
220             dy21             = _fjsp_sub_v2r8(iy2,jy1);
221             dz21             = _fjsp_sub_v2r8(iz2,jz1);
222             dx22             = _fjsp_sub_v2r8(ix2,jx2);
223             dy22             = _fjsp_sub_v2r8(iy2,jy2);
224             dz22             = _fjsp_sub_v2r8(iz2,jz2);
225
226             /* Calculate squared distance and things based on it */
227             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
228             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
229             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
230             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
231             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
232             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
233             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
234             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
235             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
236
237             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
238             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
239             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
240             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
241             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
242             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
243             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
244             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
245             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
246
247             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
248             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
249             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
250             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
251             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
252             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
253             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
254             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
255             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
256
257             fjx0             = _fjsp_setzero_v2r8();
258             fjy0             = _fjsp_setzero_v2r8();
259             fjz0             = _fjsp_setzero_v2r8();
260             fjx1             = _fjsp_setzero_v2r8();
261             fjy1             = _fjsp_setzero_v2r8();
262             fjz1             = _fjsp_setzero_v2r8();
263             fjx2             = _fjsp_setzero_v2r8();
264             fjy2             = _fjsp_setzero_v2r8();
265             fjz2             = _fjsp_setzero_v2r8();
266
267             /**************************
268              * CALCULATE INTERACTIONS *
269              **************************/
270
271             /* REACTION-FIELD ELECTROSTATICS */
272             velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
273             felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
274
275             /* Update potential sum for this i atom from the interaction with this j atom. */
276             velecsum         = _fjsp_add_v2r8(velecsum,velec);
277
278             fscal            = felec;
279
280             /* Update vectorial force */
281             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
282             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
283             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
284             
285             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
286             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
287             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
288
289             /**************************
290              * CALCULATE INTERACTIONS *
291              **************************/
292
293             /* REACTION-FIELD ELECTROSTATICS */
294             velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
295             felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
296
297             /* Update potential sum for this i atom from the interaction with this j atom. */
298             velecsum         = _fjsp_add_v2r8(velecsum,velec);
299
300             fscal            = felec;
301
302             /* Update vectorial force */
303             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
304             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
305             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
306             
307             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
308             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
309             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
310
311             /**************************
312              * CALCULATE INTERACTIONS *
313              **************************/
314
315             /* REACTION-FIELD ELECTROSTATICS */
316             velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
317             felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
318
319             /* Update potential sum for this i atom from the interaction with this j atom. */
320             velecsum         = _fjsp_add_v2r8(velecsum,velec);
321
322             fscal            = felec;
323
324             /* Update vectorial force */
325             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
326             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
327             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
328             
329             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
330             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
331             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
332
333             /**************************
334              * CALCULATE INTERACTIONS *
335              **************************/
336
337             /* REACTION-FIELD ELECTROSTATICS */
338             velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
339             felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
340
341             /* Update potential sum for this i atom from the interaction with this j atom. */
342             velecsum         = _fjsp_add_v2r8(velecsum,velec);
343
344             fscal            = felec;
345
346             /* Update vectorial force */
347             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
348             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
349             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
350             
351             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
352             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
353             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
354
355             /**************************
356              * CALCULATE INTERACTIONS *
357              **************************/
358
359             /* REACTION-FIELD ELECTROSTATICS */
360             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
361             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
362
363             /* Update potential sum for this i atom from the interaction with this j atom. */
364             velecsum         = _fjsp_add_v2r8(velecsum,velec);
365
366             fscal            = felec;
367
368             /* Update vectorial force */
369             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
370             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
371             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
372             
373             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
374             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
375             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
376
377             /**************************
378              * CALCULATE INTERACTIONS *
379              **************************/
380
381             /* REACTION-FIELD ELECTROSTATICS */
382             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
383             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
384
385             /* Update potential sum for this i atom from the interaction with this j atom. */
386             velecsum         = _fjsp_add_v2r8(velecsum,velec);
387
388             fscal            = felec;
389
390             /* Update vectorial force */
391             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
392             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
393             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
394             
395             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
396             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
397             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
398
399             /**************************
400              * CALCULATE INTERACTIONS *
401              **************************/
402
403             /* REACTION-FIELD ELECTROSTATICS */
404             velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
405             felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
406
407             /* Update potential sum for this i atom from the interaction with this j atom. */
408             velecsum         = _fjsp_add_v2r8(velecsum,velec);
409
410             fscal            = felec;
411
412             /* Update vectorial force */
413             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
414             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
415             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
416             
417             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
418             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
419             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
420
421             /**************************
422              * CALCULATE INTERACTIONS *
423              **************************/
424
425             /* REACTION-FIELD ELECTROSTATICS */
426             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
427             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
428
429             /* Update potential sum for this i atom from the interaction with this j atom. */
430             velecsum         = _fjsp_add_v2r8(velecsum,velec);
431
432             fscal            = felec;
433
434             /* Update vectorial force */
435             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
436             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
437             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
438             
439             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
440             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
441             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
442
443             /**************************
444              * CALCULATE INTERACTIONS *
445              **************************/
446
447             /* REACTION-FIELD ELECTROSTATICS */
448             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
449             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
450
451             /* Update potential sum for this i atom from the interaction with this j atom. */
452             velecsum         = _fjsp_add_v2r8(velecsum,velec);
453
454             fscal            = felec;
455
456             /* Update vectorial force */
457             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
458             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
459             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
460             
461             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
462             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
463             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
464
465             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
466
467             /* Inner loop uses 315 flops */
468         }
469
470         if(jidx<j_index_end)
471         {
472
473             jnrA             = jjnr[jidx];
474             j_coord_offsetA  = DIM*jnrA;
475
476             /* load j atom coordinates */
477             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
478                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
479
480             /* Calculate displacement vector */
481             dx00             = _fjsp_sub_v2r8(ix0,jx0);
482             dy00             = _fjsp_sub_v2r8(iy0,jy0);
483             dz00             = _fjsp_sub_v2r8(iz0,jz0);
484             dx01             = _fjsp_sub_v2r8(ix0,jx1);
485             dy01             = _fjsp_sub_v2r8(iy0,jy1);
486             dz01             = _fjsp_sub_v2r8(iz0,jz1);
487             dx02             = _fjsp_sub_v2r8(ix0,jx2);
488             dy02             = _fjsp_sub_v2r8(iy0,jy2);
489             dz02             = _fjsp_sub_v2r8(iz0,jz2);
490             dx10             = _fjsp_sub_v2r8(ix1,jx0);
491             dy10             = _fjsp_sub_v2r8(iy1,jy0);
492             dz10             = _fjsp_sub_v2r8(iz1,jz0);
493             dx11             = _fjsp_sub_v2r8(ix1,jx1);
494             dy11             = _fjsp_sub_v2r8(iy1,jy1);
495             dz11             = _fjsp_sub_v2r8(iz1,jz1);
496             dx12             = _fjsp_sub_v2r8(ix1,jx2);
497             dy12             = _fjsp_sub_v2r8(iy1,jy2);
498             dz12             = _fjsp_sub_v2r8(iz1,jz2);
499             dx20             = _fjsp_sub_v2r8(ix2,jx0);
500             dy20             = _fjsp_sub_v2r8(iy2,jy0);
501             dz20             = _fjsp_sub_v2r8(iz2,jz0);
502             dx21             = _fjsp_sub_v2r8(ix2,jx1);
503             dy21             = _fjsp_sub_v2r8(iy2,jy1);
504             dz21             = _fjsp_sub_v2r8(iz2,jz1);
505             dx22             = _fjsp_sub_v2r8(ix2,jx2);
506             dy22             = _fjsp_sub_v2r8(iy2,jy2);
507             dz22             = _fjsp_sub_v2r8(iz2,jz2);
508
509             /* Calculate squared distance and things based on it */
510             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
511             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
512             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
513             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
514             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
515             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
516             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
517             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
518             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
519
520             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
521             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
522             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
523             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
524             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
525             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
526             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
527             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
528             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
529
530             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
531             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
532             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
533             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
534             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
535             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
536             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
537             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
538             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
539
540             fjx0             = _fjsp_setzero_v2r8();
541             fjy0             = _fjsp_setzero_v2r8();
542             fjz0             = _fjsp_setzero_v2r8();
543             fjx1             = _fjsp_setzero_v2r8();
544             fjy1             = _fjsp_setzero_v2r8();
545             fjz1             = _fjsp_setzero_v2r8();
546             fjx2             = _fjsp_setzero_v2r8();
547             fjy2             = _fjsp_setzero_v2r8();
548             fjz2             = _fjsp_setzero_v2r8();
549
550             /**************************
551              * CALCULATE INTERACTIONS *
552              **************************/
553
554             /* REACTION-FIELD ELECTROSTATICS */
555             velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
556             felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
557
558             /* Update potential sum for this i atom from the interaction with this j atom. */
559             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
560             velecsum         = _fjsp_add_v2r8(velecsum,velec);
561
562             fscal            = felec;
563
564             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
565
566             /* Update vectorial force */
567             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
568             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
569             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
570             
571             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
572             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
573             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
574
575             /**************************
576              * CALCULATE INTERACTIONS *
577              **************************/
578
579             /* REACTION-FIELD ELECTROSTATICS */
580             velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
581             felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
582
583             /* Update potential sum for this i atom from the interaction with this j atom. */
584             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
585             velecsum         = _fjsp_add_v2r8(velecsum,velec);
586
587             fscal            = felec;
588
589             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
590
591             /* Update vectorial force */
592             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
593             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
594             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
595             
596             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
597             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
598             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
599
600             /**************************
601              * CALCULATE INTERACTIONS *
602              **************************/
603
604             /* REACTION-FIELD ELECTROSTATICS */
605             velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
606             felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
607
608             /* Update potential sum for this i atom from the interaction with this j atom. */
609             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
610             velecsum         = _fjsp_add_v2r8(velecsum,velec);
611
612             fscal            = felec;
613
614             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
615
616             /* Update vectorial force */
617             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
618             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
619             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
620             
621             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
622             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
623             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
624
625             /**************************
626              * CALCULATE INTERACTIONS *
627              **************************/
628
629             /* REACTION-FIELD ELECTROSTATICS */
630             velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
631             felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
632
633             /* Update potential sum for this i atom from the interaction with this j atom. */
634             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
635             velecsum         = _fjsp_add_v2r8(velecsum,velec);
636
637             fscal            = felec;
638
639             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
640
641             /* Update vectorial force */
642             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
643             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
644             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
645             
646             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
647             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
648             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
649
650             /**************************
651              * CALCULATE INTERACTIONS *
652              **************************/
653
654             /* REACTION-FIELD ELECTROSTATICS */
655             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
656             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
657
658             /* Update potential sum for this i atom from the interaction with this j atom. */
659             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
660             velecsum         = _fjsp_add_v2r8(velecsum,velec);
661
662             fscal            = felec;
663
664             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
665
666             /* Update vectorial force */
667             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
668             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
669             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
670             
671             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
672             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
673             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
674
675             /**************************
676              * CALCULATE INTERACTIONS *
677              **************************/
678
679             /* REACTION-FIELD ELECTROSTATICS */
680             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
681             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
682
683             /* Update potential sum for this i atom from the interaction with this j atom. */
684             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
685             velecsum         = _fjsp_add_v2r8(velecsum,velec);
686
687             fscal            = felec;
688
689             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
690
691             /* Update vectorial force */
692             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
693             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
694             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
695             
696             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
697             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
698             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
699
700             /**************************
701              * CALCULATE INTERACTIONS *
702              **************************/
703
704             /* REACTION-FIELD ELECTROSTATICS */
705             velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
706             felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
707
708             /* Update potential sum for this i atom from the interaction with this j atom. */
709             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
710             velecsum         = _fjsp_add_v2r8(velecsum,velec);
711
712             fscal            = felec;
713
714             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
715
716             /* Update vectorial force */
717             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
718             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
719             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
720             
721             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
722             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
723             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
724
725             /**************************
726              * CALCULATE INTERACTIONS *
727              **************************/
728
729             /* REACTION-FIELD ELECTROSTATICS */
730             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
731             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
732
733             /* Update potential sum for this i atom from the interaction with this j atom. */
734             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
735             velecsum         = _fjsp_add_v2r8(velecsum,velec);
736
737             fscal            = felec;
738
739             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
740
741             /* Update vectorial force */
742             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
743             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
744             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
745             
746             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
747             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
748             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
749
750             /**************************
751              * CALCULATE INTERACTIONS *
752              **************************/
753
754             /* REACTION-FIELD ELECTROSTATICS */
755             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
756             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
757
758             /* Update potential sum for this i atom from the interaction with this j atom. */
759             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
760             velecsum         = _fjsp_add_v2r8(velecsum,velec);
761
762             fscal            = felec;
763
764             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
765
766             /* Update vectorial force */
767             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
768             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
769             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
770             
771             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
772             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
773             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
774
775             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
776
777             /* Inner loop uses 315 flops */
778         }
779
780         /* End of innermost loop */
781
782         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
783                                               f+i_coord_offset,fshift+i_shift_offset);
784
785         ggid                        = gid[iidx];
786         /* Update potential energies */
787         gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
788
789         /* Increment number of inner iterations */
790         inneriter                  += j_index_end - j_index_start;
791
792         /* Outer loop uses 19 flops */
793     }
794
795     /* Increment number of outer iterations */
796     outeriter        += nri;
797
798     /* Update outer/inner flops */
799
800     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*315);
801 }
802 /*
803  * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
804  * Electrostatics interaction: ReactionField
805  * VdW interaction:            None
806  * Geometry:                   Water3-Water3
807  * Calculate force/pot:        Force
808  */
809 void
810 nb_kernel_ElecRF_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
811                     (t_nblist                    * gmx_restrict       nlist,
812                      rvec                        * gmx_restrict          xx,
813                      rvec                        * gmx_restrict          ff,
814                      t_forcerec                  * gmx_restrict          fr,
815                      t_mdatoms                   * gmx_restrict     mdatoms,
816                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
817                      t_nrnb                      * gmx_restrict        nrnb)
818 {
819     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
820      * just 0 for non-waters.
821      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
822      * jnr indices corresponding to data put in the four positions in the SIMD register.
823      */
824     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
825     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
826     int              jnrA,jnrB;
827     int              j_coord_offsetA,j_coord_offsetB;
828     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
829     real             rcutoff_scalar;
830     real             *shiftvec,*fshift,*x,*f;
831     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
832     int              vdwioffset0;
833     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
834     int              vdwioffset1;
835     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
836     int              vdwioffset2;
837     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
838     int              vdwjidx0A,vdwjidx0B;
839     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
840     int              vdwjidx1A,vdwjidx1B;
841     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
842     int              vdwjidx2A,vdwjidx2B;
843     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
844     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
845     _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
846     _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
847     _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
848     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
849     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
850     _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
851     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
852     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
853     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
854     real             *charge;
855     _fjsp_v2r8       itab_tmp;
856     _fjsp_v2r8       dummy_mask,cutoff_mask;
857     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
858     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
859     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
860
861     x                = xx[0];
862     f                = ff[0];
863
864     nri              = nlist->nri;
865     iinr             = nlist->iinr;
866     jindex           = nlist->jindex;
867     jjnr             = nlist->jjnr;
868     shiftidx         = nlist->shift;
869     gid              = nlist->gid;
870     shiftvec         = fr->shift_vec[0];
871     fshift           = fr->fshift[0];
872     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
873     charge           = mdatoms->chargeA;
874     krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
875     krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
876     crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
877
878     /* Setup water-specific parameters */
879     inr              = nlist->iinr[0];
880     iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
881     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
882     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
883
884     jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
885     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
886     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
887     qq00             = _fjsp_mul_v2r8(iq0,jq0);
888     qq01             = _fjsp_mul_v2r8(iq0,jq1);
889     qq02             = _fjsp_mul_v2r8(iq0,jq2);
890     qq10             = _fjsp_mul_v2r8(iq1,jq0);
891     qq11             = _fjsp_mul_v2r8(iq1,jq1);
892     qq12             = _fjsp_mul_v2r8(iq1,jq2);
893     qq20             = _fjsp_mul_v2r8(iq2,jq0);
894     qq21             = _fjsp_mul_v2r8(iq2,jq1);
895     qq22             = _fjsp_mul_v2r8(iq2,jq2);
896
897     /* Avoid stupid compiler warnings */
898     jnrA = jnrB = 0;
899     j_coord_offsetA = 0;
900     j_coord_offsetB = 0;
901
902     outeriter        = 0;
903     inneriter        = 0;
904
905     /* Start outer loop over neighborlists */
906     for(iidx=0; iidx<nri; iidx++)
907     {
908         /* Load shift vector for this list */
909         i_shift_offset   = DIM*shiftidx[iidx];
910
911         /* Load limits for loop over neighbors */
912         j_index_start    = jindex[iidx];
913         j_index_end      = jindex[iidx+1];
914
915         /* Get outer coordinate index */
916         inr              = iinr[iidx];
917         i_coord_offset   = DIM*inr;
918
919         /* Load i particle coords and add shift vector */
920         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
921                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
922
923         fix0             = _fjsp_setzero_v2r8();
924         fiy0             = _fjsp_setzero_v2r8();
925         fiz0             = _fjsp_setzero_v2r8();
926         fix1             = _fjsp_setzero_v2r8();
927         fiy1             = _fjsp_setzero_v2r8();
928         fiz1             = _fjsp_setzero_v2r8();
929         fix2             = _fjsp_setzero_v2r8();
930         fiy2             = _fjsp_setzero_v2r8();
931         fiz2             = _fjsp_setzero_v2r8();
932
933         /* Start inner kernel loop */
934         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
935         {
936
937             /* Get j neighbor index, and coordinate index */
938             jnrA             = jjnr[jidx];
939             jnrB             = jjnr[jidx+1];
940             j_coord_offsetA  = DIM*jnrA;
941             j_coord_offsetB  = DIM*jnrB;
942
943             /* load j atom coordinates */
944             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
945                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
946
947             /* Calculate displacement vector */
948             dx00             = _fjsp_sub_v2r8(ix0,jx0);
949             dy00             = _fjsp_sub_v2r8(iy0,jy0);
950             dz00             = _fjsp_sub_v2r8(iz0,jz0);
951             dx01             = _fjsp_sub_v2r8(ix0,jx1);
952             dy01             = _fjsp_sub_v2r8(iy0,jy1);
953             dz01             = _fjsp_sub_v2r8(iz0,jz1);
954             dx02             = _fjsp_sub_v2r8(ix0,jx2);
955             dy02             = _fjsp_sub_v2r8(iy0,jy2);
956             dz02             = _fjsp_sub_v2r8(iz0,jz2);
957             dx10             = _fjsp_sub_v2r8(ix1,jx0);
958             dy10             = _fjsp_sub_v2r8(iy1,jy0);
959             dz10             = _fjsp_sub_v2r8(iz1,jz0);
960             dx11             = _fjsp_sub_v2r8(ix1,jx1);
961             dy11             = _fjsp_sub_v2r8(iy1,jy1);
962             dz11             = _fjsp_sub_v2r8(iz1,jz1);
963             dx12             = _fjsp_sub_v2r8(ix1,jx2);
964             dy12             = _fjsp_sub_v2r8(iy1,jy2);
965             dz12             = _fjsp_sub_v2r8(iz1,jz2);
966             dx20             = _fjsp_sub_v2r8(ix2,jx0);
967             dy20             = _fjsp_sub_v2r8(iy2,jy0);
968             dz20             = _fjsp_sub_v2r8(iz2,jz0);
969             dx21             = _fjsp_sub_v2r8(ix2,jx1);
970             dy21             = _fjsp_sub_v2r8(iy2,jy1);
971             dz21             = _fjsp_sub_v2r8(iz2,jz1);
972             dx22             = _fjsp_sub_v2r8(ix2,jx2);
973             dy22             = _fjsp_sub_v2r8(iy2,jy2);
974             dz22             = _fjsp_sub_v2r8(iz2,jz2);
975
976             /* Calculate squared distance and things based on it */
977             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
978             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
979             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
980             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
981             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
982             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
983             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
984             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
985             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
986
987             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
988             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
989             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
990             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
991             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
992             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
993             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
994             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
995             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
996
997             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
998             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
999             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
1000             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
1001             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1002             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1003             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
1004             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1005             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1006
1007             fjx0             = _fjsp_setzero_v2r8();
1008             fjy0             = _fjsp_setzero_v2r8();
1009             fjz0             = _fjsp_setzero_v2r8();
1010             fjx1             = _fjsp_setzero_v2r8();
1011             fjy1             = _fjsp_setzero_v2r8();
1012             fjz1             = _fjsp_setzero_v2r8();
1013             fjx2             = _fjsp_setzero_v2r8();
1014             fjy2             = _fjsp_setzero_v2r8();
1015             fjz2             = _fjsp_setzero_v2r8();
1016
1017             /**************************
1018              * CALCULATE INTERACTIONS *
1019              **************************/
1020
1021             /* REACTION-FIELD ELECTROSTATICS */
1022             felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
1023
1024             fscal            = felec;
1025
1026             /* Update vectorial force */
1027             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1028             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1029             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1030             
1031             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1032             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1033             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1034
1035             /**************************
1036              * CALCULATE INTERACTIONS *
1037              **************************/
1038
1039             /* REACTION-FIELD ELECTROSTATICS */
1040             felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
1041
1042             fscal            = felec;
1043
1044             /* Update vectorial force */
1045             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
1046             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1047             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1048             
1049             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1050             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1051             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1052
1053             /**************************
1054              * CALCULATE INTERACTIONS *
1055              **************************/
1056
1057             /* REACTION-FIELD ELECTROSTATICS */
1058             felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
1059
1060             fscal            = felec;
1061
1062             /* Update vectorial force */
1063             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
1064             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1065             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1066             
1067             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1068             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1069             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1070
1071             /**************************
1072              * CALCULATE INTERACTIONS *
1073              **************************/
1074
1075             /* REACTION-FIELD ELECTROSTATICS */
1076             felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
1077
1078             fscal            = felec;
1079
1080             /* Update vectorial force */
1081             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
1082             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1083             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1084             
1085             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1086             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1087             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1088
1089             /**************************
1090              * CALCULATE INTERACTIONS *
1091              **************************/
1092
1093             /* REACTION-FIELD ELECTROSTATICS */
1094             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
1095
1096             fscal            = felec;
1097
1098             /* Update vectorial force */
1099             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1100             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1101             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1102             
1103             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1104             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1105             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1106
1107             /**************************
1108              * CALCULATE INTERACTIONS *
1109              **************************/
1110
1111             /* REACTION-FIELD ELECTROSTATICS */
1112             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
1113
1114             fscal            = felec;
1115
1116             /* Update vectorial force */
1117             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1118             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1119             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1120             
1121             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1122             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1123             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1124
1125             /**************************
1126              * CALCULATE INTERACTIONS *
1127              **************************/
1128
1129             /* REACTION-FIELD ELECTROSTATICS */
1130             felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
1131
1132             fscal            = felec;
1133
1134             /* Update vectorial force */
1135             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
1136             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1137             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1138             
1139             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1140             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1141             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1142
1143             /**************************
1144              * CALCULATE INTERACTIONS *
1145              **************************/
1146
1147             /* REACTION-FIELD ELECTROSTATICS */
1148             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1149
1150             fscal            = felec;
1151
1152             /* Update vectorial force */
1153             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1154             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1155             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1156             
1157             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1158             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1159             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1160
1161             /**************************
1162              * CALCULATE INTERACTIONS *
1163              **************************/
1164
1165             /* REACTION-FIELD ELECTROSTATICS */
1166             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1167
1168             fscal            = felec;
1169
1170             /* Update vectorial force */
1171             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1172             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1173             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1174             
1175             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1176             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1177             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1178
1179             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1180
1181             /* Inner loop uses 270 flops */
1182         }
1183
1184         if(jidx<j_index_end)
1185         {
1186
1187             jnrA             = jjnr[jidx];
1188             j_coord_offsetA  = DIM*jnrA;
1189
1190             /* load j atom coordinates */
1191             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1192                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1193
1194             /* Calculate displacement vector */
1195             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1196             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1197             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1198             dx01             = _fjsp_sub_v2r8(ix0,jx1);
1199             dy01             = _fjsp_sub_v2r8(iy0,jy1);
1200             dz01             = _fjsp_sub_v2r8(iz0,jz1);
1201             dx02             = _fjsp_sub_v2r8(ix0,jx2);
1202             dy02             = _fjsp_sub_v2r8(iy0,jy2);
1203             dz02             = _fjsp_sub_v2r8(iz0,jz2);
1204             dx10             = _fjsp_sub_v2r8(ix1,jx0);
1205             dy10             = _fjsp_sub_v2r8(iy1,jy0);
1206             dz10             = _fjsp_sub_v2r8(iz1,jz0);
1207             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1208             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1209             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1210             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1211             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1212             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1213             dx20             = _fjsp_sub_v2r8(ix2,jx0);
1214             dy20             = _fjsp_sub_v2r8(iy2,jy0);
1215             dz20             = _fjsp_sub_v2r8(iz2,jz0);
1216             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1217             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1218             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1219             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1220             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1221             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1222
1223             /* Calculate squared distance and things based on it */
1224             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1225             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1226             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1227             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1228             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1229             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1230             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1231             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1232             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1233
1234             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
1235             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
1236             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
1237             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
1238             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1239             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1240             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
1241             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1242             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1243
1244             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
1245             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
1246             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
1247             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
1248             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1249             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1250             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
1251             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1252             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1253
1254             fjx0             = _fjsp_setzero_v2r8();
1255             fjy0             = _fjsp_setzero_v2r8();
1256             fjz0             = _fjsp_setzero_v2r8();
1257             fjx1             = _fjsp_setzero_v2r8();
1258             fjy1             = _fjsp_setzero_v2r8();
1259             fjz1             = _fjsp_setzero_v2r8();
1260             fjx2             = _fjsp_setzero_v2r8();
1261             fjy2             = _fjsp_setzero_v2r8();
1262             fjz2             = _fjsp_setzero_v2r8();
1263
1264             /**************************
1265              * CALCULATE INTERACTIONS *
1266              **************************/
1267
1268             /* REACTION-FIELD ELECTROSTATICS */
1269             felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
1270
1271             fscal            = felec;
1272
1273             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1274
1275             /* Update vectorial force */
1276             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1277             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1278             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1279             
1280             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1281             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1282             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1283
1284             /**************************
1285              * CALCULATE INTERACTIONS *
1286              **************************/
1287
1288             /* REACTION-FIELD ELECTROSTATICS */
1289             felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
1290
1291             fscal            = felec;
1292
1293             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1294
1295             /* Update vectorial force */
1296             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
1297             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1298             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1299             
1300             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1301             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1302             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1303
1304             /**************************
1305              * CALCULATE INTERACTIONS *
1306              **************************/
1307
1308             /* REACTION-FIELD ELECTROSTATICS */
1309             felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
1310
1311             fscal            = felec;
1312
1313             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1314
1315             /* Update vectorial force */
1316             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
1317             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1318             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1319             
1320             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1321             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1322             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1323
1324             /**************************
1325              * CALCULATE INTERACTIONS *
1326              **************************/
1327
1328             /* REACTION-FIELD ELECTROSTATICS */
1329             felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
1330
1331             fscal            = felec;
1332
1333             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1334
1335             /* Update vectorial force */
1336             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
1337             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1338             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1339             
1340             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1341             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1342             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1343
1344             /**************************
1345              * CALCULATE INTERACTIONS *
1346              **************************/
1347
1348             /* REACTION-FIELD ELECTROSTATICS */
1349             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
1350
1351             fscal            = felec;
1352
1353             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1354
1355             /* Update vectorial force */
1356             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1357             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1358             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1359             
1360             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1361             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1362             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1363
1364             /**************************
1365              * CALCULATE INTERACTIONS *
1366              **************************/
1367
1368             /* REACTION-FIELD ELECTROSTATICS */
1369             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
1370
1371             fscal            = felec;
1372
1373             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1374
1375             /* Update vectorial force */
1376             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1377             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1378             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1379             
1380             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1381             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1382             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1383
1384             /**************************
1385              * CALCULATE INTERACTIONS *
1386              **************************/
1387
1388             /* REACTION-FIELD ELECTROSTATICS */
1389             felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
1390
1391             fscal            = felec;
1392
1393             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1394
1395             /* Update vectorial force */
1396             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
1397             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1398             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1399             
1400             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1401             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1402             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1403
1404             /**************************
1405              * CALCULATE INTERACTIONS *
1406              **************************/
1407
1408             /* REACTION-FIELD ELECTROSTATICS */
1409             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1410
1411             fscal            = felec;
1412
1413             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1414
1415             /* Update vectorial force */
1416             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1417             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1418             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1419             
1420             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1421             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1422             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1423
1424             /**************************
1425              * CALCULATE INTERACTIONS *
1426              **************************/
1427
1428             /* REACTION-FIELD ELECTROSTATICS */
1429             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1430
1431             fscal            = felec;
1432
1433             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1434
1435             /* Update vectorial force */
1436             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1437             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1438             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1439             
1440             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1441             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1442             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1443
1444             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1445
1446             /* Inner loop uses 270 flops */
1447         }
1448
1449         /* End of innermost loop */
1450
1451         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1452                                               f+i_coord_offset,fshift+i_shift_offset);
1453
1454         /* Increment number of inner iterations */
1455         inneriter                  += j_index_end - j_index_start;
1456
1457         /* Outer loop uses 18 flops */
1458     }
1459
1460     /* Increment number of outer iterations */
1461     outeriter        += nri;
1462
1463     /* Update outer/inner flops */
1464
1465     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*18 + inneriter*270);
1466 }