910fa470ccfc791340959ade0546c06a4e83386b
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_sparc64_hpc_ace_double / nb_kernel_ElecRFCut_VdwNone_GeomW3W3_sparc64_hpc_ace_double.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
37  */
38 #ifdef HAVE_CONFIG_H
39 #include <config.h>
40 #endif
41
42 #include <math.h>
43
44 #include "../nb_kernel.h"
45 #include "types/simple.h"
46 #include "vec.h"
47 #include "nrnb.h"
48
49 #include "kernelutil_sparc64_hpc_ace_double.h"
50
51 /*
52  * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
53  * Electrostatics interaction: ReactionField
54  * VdW interaction:            None
55  * Geometry:                   Water3-Water3
56  * Calculate force/pot:        PotentialAndForce
57  */
58 void
59 nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
60                     (t_nblist                    * gmx_restrict       nlist,
61                      rvec                        * gmx_restrict          xx,
62                      rvec                        * gmx_restrict          ff,
63                      t_forcerec                  * gmx_restrict          fr,
64                      t_mdatoms                   * gmx_restrict     mdatoms,
65                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
66                      t_nrnb                      * gmx_restrict        nrnb)
67 {
68     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
69      * just 0 for non-waters.
70      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
71      * jnr indices corresponding to data put in the four positions in the SIMD register.
72      */
73     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
74     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
75     int              jnrA,jnrB;
76     int              j_coord_offsetA,j_coord_offsetB;
77     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
78     real             rcutoff_scalar;
79     real             *shiftvec,*fshift,*x,*f;
80     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
81     int              vdwioffset0;
82     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
83     int              vdwioffset1;
84     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
85     int              vdwioffset2;
86     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
87     int              vdwjidx0A,vdwjidx0B;
88     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
89     int              vdwjidx1A,vdwjidx1B;
90     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
91     int              vdwjidx2A,vdwjidx2B;
92     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
93     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
94     _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
95     _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
96     _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
97     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
98     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
99     _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
100     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
101     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
102     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
103     real             *charge;
104     _fjsp_v2r8       itab_tmp;
105     _fjsp_v2r8       dummy_mask,cutoff_mask;
106     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
107     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
108     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
109
110     x                = xx[0];
111     f                = ff[0];
112
113     nri              = nlist->nri;
114     iinr             = nlist->iinr;
115     jindex           = nlist->jindex;
116     jjnr             = nlist->jjnr;
117     shiftidx         = nlist->shift;
118     gid              = nlist->gid;
119     shiftvec         = fr->shift_vec[0];
120     fshift           = fr->fshift[0];
121     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
122     charge           = mdatoms->chargeA;
123     krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
124     krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
125     crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
126
127     /* Setup water-specific parameters */
128     inr              = nlist->iinr[0];
129     iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
130     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
131     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
132
133     jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
134     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
135     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
136     qq00             = _fjsp_mul_v2r8(iq0,jq0);
137     qq01             = _fjsp_mul_v2r8(iq0,jq1);
138     qq02             = _fjsp_mul_v2r8(iq0,jq2);
139     qq10             = _fjsp_mul_v2r8(iq1,jq0);
140     qq11             = _fjsp_mul_v2r8(iq1,jq1);
141     qq12             = _fjsp_mul_v2r8(iq1,jq2);
142     qq20             = _fjsp_mul_v2r8(iq2,jq0);
143     qq21             = _fjsp_mul_v2r8(iq2,jq1);
144     qq22             = _fjsp_mul_v2r8(iq2,jq2);
145
146     /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
147     rcutoff_scalar   = fr->rcoulomb;
148     rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
149     rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
150
151     /* Avoid stupid compiler warnings */
152     jnrA = jnrB = 0;
153     j_coord_offsetA = 0;
154     j_coord_offsetB = 0;
155
156     outeriter        = 0;
157     inneriter        = 0;
158
159     /* Start outer loop over neighborlists */
160     for(iidx=0; iidx<nri; iidx++)
161     {
162         /* Load shift vector for this list */
163         i_shift_offset   = DIM*shiftidx[iidx];
164
165         /* Load limits for loop over neighbors */
166         j_index_start    = jindex[iidx];
167         j_index_end      = jindex[iidx+1];
168
169         /* Get outer coordinate index */
170         inr              = iinr[iidx];
171         i_coord_offset   = DIM*inr;
172
173         /* Load i particle coords and add shift vector */
174         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
175                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
176
177         fix0             = _fjsp_setzero_v2r8();
178         fiy0             = _fjsp_setzero_v2r8();
179         fiz0             = _fjsp_setzero_v2r8();
180         fix1             = _fjsp_setzero_v2r8();
181         fiy1             = _fjsp_setzero_v2r8();
182         fiz1             = _fjsp_setzero_v2r8();
183         fix2             = _fjsp_setzero_v2r8();
184         fiy2             = _fjsp_setzero_v2r8();
185         fiz2             = _fjsp_setzero_v2r8();
186
187         /* Reset potential sums */
188         velecsum         = _fjsp_setzero_v2r8();
189
190         /* Start inner kernel loop */
191         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
192         {
193
194             /* Get j neighbor index, and coordinate index */
195             jnrA             = jjnr[jidx];
196             jnrB             = jjnr[jidx+1];
197             j_coord_offsetA  = DIM*jnrA;
198             j_coord_offsetB  = DIM*jnrB;
199
200             /* load j atom coordinates */
201             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
202                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
203
204             /* Calculate displacement vector */
205             dx00             = _fjsp_sub_v2r8(ix0,jx0);
206             dy00             = _fjsp_sub_v2r8(iy0,jy0);
207             dz00             = _fjsp_sub_v2r8(iz0,jz0);
208             dx01             = _fjsp_sub_v2r8(ix0,jx1);
209             dy01             = _fjsp_sub_v2r8(iy0,jy1);
210             dz01             = _fjsp_sub_v2r8(iz0,jz1);
211             dx02             = _fjsp_sub_v2r8(ix0,jx2);
212             dy02             = _fjsp_sub_v2r8(iy0,jy2);
213             dz02             = _fjsp_sub_v2r8(iz0,jz2);
214             dx10             = _fjsp_sub_v2r8(ix1,jx0);
215             dy10             = _fjsp_sub_v2r8(iy1,jy0);
216             dz10             = _fjsp_sub_v2r8(iz1,jz0);
217             dx11             = _fjsp_sub_v2r8(ix1,jx1);
218             dy11             = _fjsp_sub_v2r8(iy1,jy1);
219             dz11             = _fjsp_sub_v2r8(iz1,jz1);
220             dx12             = _fjsp_sub_v2r8(ix1,jx2);
221             dy12             = _fjsp_sub_v2r8(iy1,jy2);
222             dz12             = _fjsp_sub_v2r8(iz1,jz2);
223             dx20             = _fjsp_sub_v2r8(ix2,jx0);
224             dy20             = _fjsp_sub_v2r8(iy2,jy0);
225             dz20             = _fjsp_sub_v2r8(iz2,jz0);
226             dx21             = _fjsp_sub_v2r8(ix2,jx1);
227             dy21             = _fjsp_sub_v2r8(iy2,jy1);
228             dz21             = _fjsp_sub_v2r8(iz2,jz1);
229             dx22             = _fjsp_sub_v2r8(ix2,jx2);
230             dy22             = _fjsp_sub_v2r8(iy2,jy2);
231             dz22             = _fjsp_sub_v2r8(iz2,jz2);
232
233             /* Calculate squared distance and things based on it */
234             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
235             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
236             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
237             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
238             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
239             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
240             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
241             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
242             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
243
244             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
245             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
246             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
247             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
248             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
249             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
250             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
251             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
252             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
253
254             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
255             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
256             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
257             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
258             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
259             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
260             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
261             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
262             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
263
264             fjx0             = _fjsp_setzero_v2r8();
265             fjy0             = _fjsp_setzero_v2r8();
266             fjz0             = _fjsp_setzero_v2r8();
267             fjx1             = _fjsp_setzero_v2r8();
268             fjy1             = _fjsp_setzero_v2r8();
269             fjz1             = _fjsp_setzero_v2r8();
270             fjx2             = _fjsp_setzero_v2r8();
271             fjy2             = _fjsp_setzero_v2r8();
272             fjz2             = _fjsp_setzero_v2r8();
273
274             /**************************
275              * CALCULATE INTERACTIONS *
276              **************************/
277
278             if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
279             {
280
281             /* REACTION-FIELD ELECTROSTATICS */
282             velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
283             felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
284
285             cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
286
287             /* Update potential sum for this i atom from the interaction with this j atom. */
288             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
289             velecsum         = _fjsp_add_v2r8(velecsum,velec);
290
291             fscal            = felec;
292
293             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
294
295             /* Update vectorial force */
296             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
297             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
298             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
299             
300             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
301             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
302             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
303
304             }
305
306             /**************************
307              * CALCULATE INTERACTIONS *
308              **************************/
309
310             if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
311             {
312
313             /* REACTION-FIELD ELECTROSTATICS */
314             velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
315             felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
316
317             cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
318
319             /* Update potential sum for this i atom from the interaction with this j atom. */
320             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
321             velecsum         = _fjsp_add_v2r8(velecsum,velec);
322
323             fscal            = felec;
324
325             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
326
327             /* Update vectorial force */
328             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
329             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
330             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
331             
332             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
333             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
334             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
335
336             }
337
338             /**************************
339              * CALCULATE INTERACTIONS *
340              **************************/
341
342             if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
343             {
344
345             /* REACTION-FIELD ELECTROSTATICS */
346             velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
347             felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
348
349             cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
350
351             /* Update potential sum for this i atom from the interaction with this j atom. */
352             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
353             velecsum         = _fjsp_add_v2r8(velecsum,velec);
354
355             fscal            = felec;
356
357             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
358
359             /* Update vectorial force */
360             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
361             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
362             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
363             
364             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
365             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
366             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
367
368             }
369
370             /**************************
371              * CALCULATE INTERACTIONS *
372              **************************/
373
374             if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
375             {
376
377             /* REACTION-FIELD ELECTROSTATICS */
378             velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
379             felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
380
381             cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
382
383             /* Update potential sum for this i atom from the interaction with this j atom. */
384             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
385             velecsum         = _fjsp_add_v2r8(velecsum,velec);
386
387             fscal            = felec;
388
389             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
390
391             /* Update vectorial force */
392             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
393             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
394             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
395             
396             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
397             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
398             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
399
400             }
401
402             /**************************
403              * CALCULATE INTERACTIONS *
404              **************************/
405
406             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
407             {
408
409             /* REACTION-FIELD ELECTROSTATICS */
410             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
411             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
412
413             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
414
415             /* Update potential sum for this i atom from the interaction with this j atom. */
416             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
417             velecsum         = _fjsp_add_v2r8(velecsum,velec);
418
419             fscal            = felec;
420
421             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
422
423             /* Update vectorial force */
424             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
425             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
426             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
427             
428             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
429             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
430             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
431
432             }
433
434             /**************************
435              * CALCULATE INTERACTIONS *
436              **************************/
437
438             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
439             {
440
441             /* REACTION-FIELD ELECTROSTATICS */
442             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
443             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
444
445             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
446
447             /* Update potential sum for this i atom from the interaction with this j atom. */
448             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
449             velecsum         = _fjsp_add_v2r8(velecsum,velec);
450
451             fscal            = felec;
452
453             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
454
455             /* Update vectorial force */
456             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
457             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
458             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
459             
460             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
461             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
462             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
463
464             }
465
466             /**************************
467              * CALCULATE INTERACTIONS *
468              **************************/
469
470             if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
471             {
472
473             /* REACTION-FIELD ELECTROSTATICS */
474             velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
475             felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
476
477             cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
478
479             /* Update potential sum for this i atom from the interaction with this j atom. */
480             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
481             velecsum         = _fjsp_add_v2r8(velecsum,velec);
482
483             fscal            = felec;
484
485             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
486
487             /* Update vectorial force */
488             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
489             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
490             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
491             
492             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
493             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
494             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
495
496             }
497
498             /**************************
499              * CALCULATE INTERACTIONS *
500              **************************/
501
502             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
503             {
504
505             /* REACTION-FIELD ELECTROSTATICS */
506             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
507             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
508
509             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
510
511             /* Update potential sum for this i atom from the interaction with this j atom. */
512             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
513             velecsum         = _fjsp_add_v2r8(velecsum,velec);
514
515             fscal            = felec;
516
517             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
518
519             /* Update vectorial force */
520             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
521             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
522             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
523             
524             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
525             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
526             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
527
528             }
529
530             /**************************
531              * CALCULATE INTERACTIONS *
532              **************************/
533
534             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
535             {
536
537             /* REACTION-FIELD ELECTROSTATICS */
538             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
539             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
540
541             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
542
543             /* Update potential sum for this i atom from the interaction with this j atom. */
544             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
545             velecsum         = _fjsp_add_v2r8(velecsum,velec);
546
547             fscal            = felec;
548
549             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
550
551             /* Update vectorial force */
552             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
553             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
554             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
555             
556             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
557             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
558             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
559
560             }
561
562             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
563
564             /* Inner loop uses 351 flops */
565         }
566
567         if(jidx<j_index_end)
568         {
569
570             jnrA             = jjnr[jidx];
571             j_coord_offsetA  = DIM*jnrA;
572
573             /* load j atom coordinates */
574             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
575                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
576
577             /* Calculate displacement vector */
578             dx00             = _fjsp_sub_v2r8(ix0,jx0);
579             dy00             = _fjsp_sub_v2r8(iy0,jy0);
580             dz00             = _fjsp_sub_v2r8(iz0,jz0);
581             dx01             = _fjsp_sub_v2r8(ix0,jx1);
582             dy01             = _fjsp_sub_v2r8(iy0,jy1);
583             dz01             = _fjsp_sub_v2r8(iz0,jz1);
584             dx02             = _fjsp_sub_v2r8(ix0,jx2);
585             dy02             = _fjsp_sub_v2r8(iy0,jy2);
586             dz02             = _fjsp_sub_v2r8(iz0,jz2);
587             dx10             = _fjsp_sub_v2r8(ix1,jx0);
588             dy10             = _fjsp_sub_v2r8(iy1,jy0);
589             dz10             = _fjsp_sub_v2r8(iz1,jz0);
590             dx11             = _fjsp_sub_v2r8(ix1,jx1);
591             dy11             = _fjsp_sub_v2r8(iy1,jy1);
592             dz11             = _fjsp_sub_v2r8(iz1,jz1);
593             dx12             = _fjsp_sub_v2r8(ix1,jx2);
594             dy12             = _fjsp_sub_v2r8(iy1,jy2);
595             dz12             = _fjsp_sub_v2r8(iz1,jz2);
596             dx20             = _fjsp_sub_v2r8(ix2,jx0);
597             dy20             = _fjsp_sub_v2r8(iy2,jy0);
598             dz20             = _fjsp_sub_v2r8(iz2,jz0);
599             dx21             = _fjsp_sub_v2r8(ix2,jx1);
600             dy21             = _fjsp_sub_v2r8(iy2,jy1);
601             dz21             = _fjsp_sub_v2r8(iz2,jz1);
602             dx22             = _fjsp_sub_v2r8(ix2,jx2);
603             dy22             = _fjsp_sub_v2r8(iy2,jy2);
604             dz22             = _fjsp_sub_v2r8(iz2,jz2);
605
606             /* Calculate squared distance and things based on it */
607             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
608             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
609             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
610             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
611             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
612             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
613             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
614             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
615             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
616
617             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
618             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
619             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
620             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
621             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
622             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
623             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
624             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
625             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
626
627             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
628             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
629             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
630             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
631             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
632             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
633             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
634             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
635             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
636
637             fjx0             = _fjsp_setzero_v2r8();
638             fjy0             = _fjsp_setzero_v2r8();
639             fjz0             = _fjsp_setzero_v2r8();
640             fjx1             = _fjsp_setzero_v2r8();
641             fjy1             = _fjsp_setzero_v2r8();
642             fjz1             = _fjsp_setzero_v2r8();
643             fjx2             = _fjsp_setzero_v2r8();
644             fjy2             = _fjsp_setzero_v2r8();
645             fjz2             = _fjsp_setzero_v2r8();
646
647             /**************************
648              * CALCULATE INTERACTIONS *
649              **************************/
650
651             if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
652             {
653
654             /* REACTION-FIELD ELECTROSTATICS */
655             velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
656             felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
657
658             cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
659
660             /* Update potential sum for this i atom from the interaction with this j atom. */
661             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
662             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
663             velecsum         = _fjsp_add_v2r8(velecsum,velec);
664
665             fscal            = felec;
666
667             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
668
669             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
670
671             /* Update vectorial force */
672             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
673             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
674             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
675             
676             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
677             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
678             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
679
680             }
681
682             /**************************
683              * CALCULATE INTERACTIONS *
684              **************************/
685
686             if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
687             {
688
689             /* REACTION-FIELD ELECTROSTATICS */
690             velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
691             felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
692
693             cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
694
695             /* Update potential sum for this i atom from the interaction with this j atom. */
696             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
697             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
698             velecsum         = _fjsp_add_v2r8(velecsum,velec);
699
700             fscal            = felec;
701
702             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
703
704             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
705
706             /* Update vectorial force */
707             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
708             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
709             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
710             
711             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
712             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
713             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
714
715             }
716
717             /**************************
718              * CALCULATE INTERACTIONS *
719              **************************/
720
721             if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
722             {
723
724             /* REACTION-FIELD ELECTROSTATICS */
725             velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
726             felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
727
728             cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
729
730             /* Update potential sum for this i atom from the interaction with this j atom. */
731             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
732             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
733             velecsum         = _fjsp_add_v2r8(velecsum,velec);
734
735             fscal            = felec;
736
737             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
738
739             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
740
741             /* Update vectorial force */
742             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
743             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
744             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
745             
746             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
747             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
748             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
749
750             }
751
752             /**************************
753              * CALCULATE INTERACTIONS *
754              **************************/
755
756             if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
757             {
758
759             /* REACTION-FIELD ELECTROSTATICS */
760             velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
761             felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
762
763             cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
764
765             /* Update potential sum for this i atom from the interaction with this j atom. */
766             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
767             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
768             velecsum         = _fjsp_add_v2r8(velecsum,velec);
769
770             fscal            = felec;
771
772             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
773
774             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
775
776             /* Update vectorial force */
777             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
778             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
779             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
780             
781             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
782             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
783             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
784
785             }
786
787             /**************************
788              * CALCULATE INTERACTIONS *
789              **************************/
790
791             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
792             {
793
794             /* REACTION-FIELD ELECTROSTATICS */
795             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
796             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
797
798             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
799
800             /* Update potential sum for this i atom from the interaction with this j atom. */
801             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
802             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
803             velecsum         = _fjsp_add_v2r8(velecsum,velec);
804
805             fscal            = felec;
806
807             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
808
809             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
810
811             /* Update vectorial force */
812             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
813             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
814             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
815             
816             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
817             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
818             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
819
820             }
821
822             /**************************
823              * CALCULATE INTERACTIONS *
824              **************************/
825
826             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
827             {
828
829             /* REACTION-FIELD ELECTROSTATICS */
830             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
831             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
832
833             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
834
835             /* Update potential sum for this i atom from the interaction with this j atom. */
836             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
837             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
838             velecsum         = _fjsp_add_v2r8(velecsum,velec);
839
840             fscal            = felec;
841
842             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
843
844             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
845
846             /* Update vectorial force */
847             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
848             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
849             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
850             
851             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
852             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
853             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
854
855             }
856
857             /**************************
858              * CALCULATE INTERACTIONS *
859              **************************/
860
861             if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
862             {
863
864             /* REACTION-FIELD ELECTROSTATICS */
865             velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
866             felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
867
868             cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
869
870             /* Update potential sum for this i atom from the interaction with this j atom. */
871             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
872             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
873             velecsum         = _fjsp_add_v2r8(velecsum,velec);
874
875             fscal            = felec;
876
877             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
878
879             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
880
881             /* Update vectorial force */
882             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
883             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
884             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
885             
886             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
887             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
888             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
889
890             }
891
892             /**************************
893              * CALCULATE INTERACTIONS *
894              **************************/
895
896             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
897             {
898
899             /* REACTION-FIELD ELECTROSTATICS */
900             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
901             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
902
903             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
904
905             /* Update potential sum for this i atom from the interaction with this j atom. */
906             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
907             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
908             velecsum         = _fjsp_add_v2r8(velecsum,velec);
909
910             fscal            = felec;
911
912             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
913
914             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
915
916             /* Update vectorial force */
917             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
918             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
919             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
920             
921             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
922             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
923             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
924
925             }
926
927             /**************************
928              * CALCULATE INTERACTIONS *
929              **************************/
930
931             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
932             {
933
934             /* REACTION-FIELD ELECTROSTATICS */
935             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
936             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
937
938             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
939
940             /* Update potential sum for this i atom from the interaction with this j atom. */
941             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
942             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
943             velecsum         = _fjsp_add_v2r8(velecsum,velec);
944
945             fscal            = felec;
946
947             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
948
949             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
950
951             /* Update vectorial force */
952             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
953             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
954             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
955             
956             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
957             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
958             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
959
960             }
961
962             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
963
964             /* Inner loop uses 351 flops */
965         }
966
967         /* End of innermost loop */
968
969         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
970                                               f+i_coord_offset,fshift+i_shift_offset);
971
972         ggid                        = gid[iidx];
973         /* Update potential energies */
974         gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
975
976         /* Increment number of inner iterations */
977         inneriter                  += j_index_end - j_index_start;
978
979         /* Outer loop uses 19 flops */
980     }
981
982     /* Increment number of outer iterations */
983     outeriter        += nri;
984
985     /* Update outer/inner flops */
986
987     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*351);
988 }
989 /*
990  * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
991  * Electrostatics interaction: ReactionField
992  * VdW interaction:            None
993  * Geometry:                   Water3-Water3
994  * Calculate force/pot:        Force
995  */
996 void
997 nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
998                     (t_nblist                    * gmx_restrict       nlist,
999                      rvec                        * gmx_restrict          xx,
1000                      rvec                        * gmx_restrict          ff,
1001                      t_forcerec                  * gmx_restrict          fr,
1002                      t_mdatoms                   * gmx_restrict     mdatoms,
1003                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
1004                      t_nrnb                      * gmx_restrict        nrnb)
1005 {
1006     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1007      * just 0 for non-waters.
1008      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
1009      * jnr indices corresponding to data put in the four positions in the SIMD register.
1010      */
1011     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
1012     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1013     int              jnrA,jnrB;
1014     int              j_coord_offsetA,j_coord_offsetB;
1015     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
1016     real             rcutoff_scalar;
1017     real             *shiftvec,*fshift,*x,*f;
1018     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1019     int              vdwioffset0;
1020     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1021     int              vdwioffset1;
1022     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1023     int              vdwioffset2;
1024     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1025     int              vdwjidx0A,vdwjidx0B;
1026     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1027     int              vdwjidx1A,vdwjidx1B;
1028     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1029     int              vdwjidx2A,vdwjidx2B;
1030     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1031     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1032     _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
1033     _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
1034     _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
1035     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1036     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1037     _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
1038     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1039     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1040     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
1041     real             *charge;
1042     _fjsp_v2r8       itab_tmp;
1043     _fjsp_v2r8       dummy_mask,cutoff_mask;
1044     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
1045     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
1046     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1047
1048     x                = xx[0];
1049     f                = ff[0];
1050
1051     nri              = nlist->nri;
1052     iinr             = nlist->iinr;
1053     jindex           = nlist->jindex;
1054     jjnr             = nlist->jjnr;
1055     shiftidx         = nlist->shift;
1056     gid              = nlist->gid;
1057     shiftvec         = fr->shift_vec[0];
1058     fshift           = fr->fshift[0];
1059     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
1060     charge           = mdatoms->chargeA;
1061     krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
1062     krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
1063     crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
1064
1065     /* Setup water-specific parameters */
1066     inr              = nlist->iinr[0];
1067     iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
1068     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1069     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1070
1071     jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
1072     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
1073     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
1074     qq00             = _fjsp_mul_v2r8(iq0,jq0);
1075     qq01             = _fjsp_mul_v2r8(iq0,jq1);
1076     qq02             = _fjsp_mul_v2r8(iq0,jq2);
1077     qq10             = _fjsp_mul_v2r8(iq1,jq0);
1078     qq11             = _fjsp_mul_v2r8(iq1,jq1);
1079     qq12             = _fjsp_mul_v2r8(iq1,jq2);
1080     qq20             = _fjsp_mul_v2r8(iq2,jq0);
1081     qq21             = _fjsp_mul_v2r8(iq2,jq1);
1082     qq22             = _fjsp_mul_v2r8(iq2,jq2);
1083
1084     /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
1085     rcutoff_scalar   = fr->rcoulomb;
1086     rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
1087     rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
1088
1089     /* Avoid stupid compiler warnings */
1090     jnrA = jnrB = 0;
1091     j_coord_offsetA = 0;
1092     j_coord_offsetB = 0;
1093
1094     outeriter        = 0;
1095     inneriter        = 0;
1096
1097     /* Start outer loop over neighborlists */
1098     for(iidx=0; iidx<nri; iidx++)
1099     {
1100         /* Load shift vector for this list */
1101         i_shift_offset   = DIM*shiftidx[iidx];
1102
1103         /* Load limits for loop over neighbors */
1104         j_index_start    = jindex[iidx];
1105         j_index_end      = jindex[iidx+1];
1106
1107         /* Get outer coordinate index */
1108         inr              = iinr[iidx];
1109         i_coord_offset   = DIM*inr;
1110
1111         /* Load i particle coords and add shift vector */
1112         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
1113                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
1114
1115         fix0             = _fjsp_setzero_v2r8();
1116         fiy0             = _fjsp_setzero_v2r8();
1117         fiz0             = _fjsp_setzero_v2r8();
1118         fix1             = _fjsp_setzero_v2r8();
1119         fiy1             = _fjsp_setzero_v2r8();
1120         fiz1             = _fjsp_setzero_v2r8();
1121         fix2             = _fjsp_setzero_v2r8();
1122         fiy2             = _fjsp_setzero_v2r8();
1123         fiz2             = _fjsp_setzero_v2r8();
1124
1125         /* Start inner kernel loop */
1126         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1127         {
1128
1129             /* Get j neighbor index, and coordinate index */
1130             jnrA             = jjnr[jidx];
1131             jnrB             = jjnr[jidx+1];
1132             j_coord_offsetA  = DIM*jnrA;
1133             j_coord_offsetB  = DIM*jnrB;
1134
1135             /* load j atom coordinates */
1136             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
1137                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1138
1139             /* Calculate displacement vector */
1140             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1141             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1142             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1143             dx01             = _fjsp_sub_v2r8(ix0,jx1);
1144             dy01             = _fjsp_sub_v2r8(iy0,jy1);
1145             dz01             = _fjsp_sub_v2r8(iz0,jz1);
1146             dx02             = _fjsp_sub_v2r8(ix0,jx2);
1147             dy02             = _fjsp_sub_v2r8(iy0,jy2);
1148             dz02             = _fjsp_sub_v2r8(iz0,jz2);
1149             dx10             = _fjsp_sub_v2r8(ix1,jx0);
1150             dy10             = _fjsp_sub_v2r8(iy1,jy0);
1151             dz10             = _fjsp_sub_v2r8(iz1,jz0);
1152             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1153             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1154             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1155             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1156             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1157             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1158             dx20             = _fjsp_sub_v2r8(ix2,jx0);
1159             dy20             = _fjsp_sub_v2r8(iy2,jy0);
1160             dz20             = _fjsp_sub_v2r8(iz2,jz0);
1161             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1162             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1163             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1164             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1165             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1166             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1167
1168             /* Calculate squared distance and things based on it */
1169             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1170             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1171             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1172             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1173             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1174             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1175             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1176             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1177             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1178
1179             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
1180             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
1181             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
1182             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
1183             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1184             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1185             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
1186             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1187             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1188
1189             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
1190             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
1191             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
1192             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
1193             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1194             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1195             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
1196             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1197             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1198
1199             fjx0             = _fjsp_setzero_v2r8();
1200             fjy0             = _fjsp_setzero_v2r8();
1201             fjz0             = _fjsp_setzero_v2r8();
1202             fjx1             = _fjsp_setzero_v2r8();
1203             fjy1             = _fjsp_setzero_v2r8();
1204             fjz1             = _fjsp_setzero_v2r8();
1205             fjx2             = _fjsp_setzero_v2r8();
1206             fjy2             = _fjsp_setzero_v2r8();
1207             fjz2             = _fjsp_setzero_v2r8();
1208
1209             /**************************
1210              * CALCULATE INTERACTIONS *
1211              **************************/
1212
1213             if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
1214             {
1215
1216             /* REACTION-FIELD ELECTROSTATICS */
1217             felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
1218
1219             cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
1220
1221             fscal            = felec;
1222
1223             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1224
1225             /* Update vectorial force */
1226             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1227             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1228             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1229             
1230             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1231             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1232             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1233
1234             }
1235
1236             /**************************
1237              * CALCULATE INTERACTIONS *
1238              **************************/
1239
1240             if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
1241             {
1242
1243             /* REACTION-FIELD ELECTROSTATICS */
1244             felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
1245
1246             cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
1247
1248             fscal            = felec;
1249
1250             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1251
1252             /* Update vectorial force */
1253             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
1254             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1255             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1256             
1257             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1258             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1259             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1260
1261             }
1262
1263             /**************************
1264              * CALCULATE INTERACTIONS *
1265              **************************/
1266
1267             if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
1268             {
1269
1270             /* REACTION-FIELD ELECTROSTATICS */
1271             felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
1272
1273             cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
1274
1275             fscal            = felec;
1276
1277             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1278
1279             /* Update vectorial force */
1280             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
1281             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1282             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1283             
1284             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1285             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1286             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1287
1288             }
1289
1290             /**************************
1291              * CALCULATE INTERACTIONS *
1292              **************************/
1293
1294             if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
1295             {
1296
1297             /* REACTION-FIELD ELECTROSTATICS */
1298             felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
1299
1300             cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
1301
1302             fscal            = felec;
1303
1304             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1305
1306             /* Update vectorial force */
1307             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
1308             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1309             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1310             
1311             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1312             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1313             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1314
1315             }
1316
1317             /**************************
1318              * CALCULATE INTERACTIONS *
1319              **************************/
1320
1321             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
1322             {
1323
1324             /* REACTION-FIELD ELECTROSTATICS */
1325             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
1326
1327             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
1328
1329             fscal            = felec;
1330
1331             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1332
1333             /* Update vectorial force */
1334             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1335             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1336             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1337             
1338             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1339             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1340             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1341
1342             }
1343
1344             /**************************
1345              * CALCULATE INTERACTIONS *
1346              **************************/
1347
1348             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
1349             {
1350
1351             /* REACTION-FIELD ELECTROSTATICS */
1352             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
1353
1354             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
1355
1356             fscal            = felec;
1357
1358             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1359
1360             /* Update vectorial force */
1361             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1362             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1363             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1364             
1365             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1366             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1367             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1368
1369             }
1370
1371             /**************************
1372              * CALCULATE INTERACTIONS *
1373              **************************/
1374
1375             if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
1376             {
1377
1378             /* REACTION-FIELD ELECTROSTATICS */
1379             felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
1380
1381             cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
1382
1383             fscal            = felec;
1384
1385             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1386
1387             /* Update vectorial force */
1388             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
1389             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1390             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1391             
1392             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1393             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1394             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1395
1396             }
1397
1398             /**************************
1399              * CALCULATE INTERACTIONS *
1400              **************************/
1401
1402             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1403             {
1404
1405             /* REACTION-FIELD ELECTROSTATICS */
1406             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1407
1408             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1409
1410             fscal            = felec;
1411
1412             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1413
1414             /* Update vectorial force */
1415             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1416             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1417             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1418             
1419             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1420             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1421             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1422
1423             }
1424
1425             /**************************
1426              * CALCULATE INTERACTIONS *
1427              **************************/
1428
1429             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1430             {
1431
1432             /* REACTION-FIELD ELECTROSTATICS */
1433             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1434
1435             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1436
1437             fscal            = felec;
1438
1439             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1440
1441             /* Update vectorial force */
1442             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1443             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1444             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1445             
1446             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1447             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1448             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1449
1450             }
1451
1452             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1453
1454             /* Inner loop uses 297 flops */
1455         }
1456
1457         if(jidx<j_index_end)
1458         {
1459
1460             jnrA             = jjnr[jidx];
1461             j_coord_offsetA  = DIM*jnrA;
1462
1463             /* load j atom coordinates */
1464             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1465                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1466
1467             /* Calculate displacement vector */
1468             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1469             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1470             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1471             dx01             = _fjsp_sub_v2r8(ix0,jx1);
1472             dy01             = _fjsp_sub_v2r8(iy0,jy1);
1473             dz01             = _fjsp_sub_v2r8(iz0,jz1);
1474             dx02             = _fjsp_sub_v2r8(ix0,jx2);
1475             dy02             = _fjsp_sub_v2r8(iy0,jy2);
1476             dz02             = _fjsp_sub_v2r8(iz0,jz2);
1477             dx10             = _fjsp_sub_v2r8(ix1,jx0);
1478             dy10             = _fjsp_sub_v2r8(iy1,jy0);
1479             dz10             = _fjsp_sub_v2r8(iz1,jz0);
1480             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1481             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1482             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1483             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1484             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1485             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1486             dx20             = _fjsp_sub_v2r8(ix2,jx0);
1487             dy20             = _fjsp_sub_v2r8(iy2,jy0);
1488             dz20             = _fjsp_sub_v2r8(iz2,jz0);
1489             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1490             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1491             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1492             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1493             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1494             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1495
1496             /* Calculate squared distance and things based on it */
1497             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1498             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1499             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1500             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1501             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1502             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1503             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1504             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1505             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1506
1507             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
1508             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
1509             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
1510             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
1511             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1512             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1513             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
1514             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1515             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1516
1517             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
1518             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
1519             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
1520             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
1521             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1522             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1523             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
1524             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1525             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1526
1527             fjx0             = _fjsp_setzero_v2r8();
1528             fjy0             = _fjsp_setzero_v2r8();
1529             fjz0             = _fjsp_setzero_v2r8();
1530             fjx1             = _fjsp_setzero_v2r8();
1531             fjy1             = _fjsp_setzero_v2r8();
1532             fjz1             = _fjsp_setzero_v2r8();
1533             fjx2             = _fjsp_setzero_v2r8();
1534             fjy2             = _fjsp_setzero_v2r8();
1535             fjz2             = _fjsp_setzero_v2r8();
1536
1537             /**************************
1538              * CALCULATE INTERACTIONS *
1539              **************************/
1540
1541             if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
1542             {
1543
1544             /* REACTION-FIELD ELECTROSTATICS */
1545             felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
1546
1547             cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
1548
1549             fscal            = felec;
1550
1551             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1552
1553             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1554
1555             /* Update vectorial force */
1556             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1557             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1558             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1559             
1560             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1561             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1562             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1563
1564             }
1565
1566             /**************************
1567              * CALCULATE INTERACTIONS *
1568              **************************/
1569
1570             if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
1571             {
1572
1573             /* REACTION-FIELD ELECTROSTATICS */
1574             felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
1575
1576             cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
1577
1578             fscal            = felec;
1579
1580             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1581
1582             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1583
1584             /* Update vectorial force */
1585             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
1586             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1587             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1588             
1589             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1590             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1591             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1592
1593             }
1594
1595             /**************************
1596              * CALCULATE INTERACTIONS *
1597              **************************/
1598
1599             if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
1600             {
1601
1602             /* REACTION-FIELD ELECTROSTATICS */
1603             felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
1604
1605             cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
1606
1607             fscal            = felec;
1608
1609             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1610
1611             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1612
1613             /* Update vectorial force */
1614             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
1615             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1616             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1617             
1618             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1619             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1620             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1621
1622             }
1623
1624             /**************************
1625              * CALCULATE INTERACTIONS *
1626              **************************/
1627
1628             if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
1629             {
1630
1631             /* REACTION-FIELD ELECTROSTATICS */
1632             felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
1633
1634             cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
1635
1636             fscal            = felec;
1637
1638             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1639
1640             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1641
1642             /* Update vectorial force */
1643             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
1644             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1645             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1646             
1647             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1648             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1649             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1650
1651             }
1652
1653             /**************************
1654              * CALCULATE INTERACTIONS *
1655              **************************/
1656
1657             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
1658             {
1659
1660             /* REACTION-FIELD ELECTROSTATICS */
1661             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
1662
1663             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
1664
1665             fscal            = felec;
1666
1667             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1668
1669             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1670
1671             /* Update vectorial force */
1672             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1673             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1674             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1675             
1676             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1677             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1678             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1679
1680             }
1681
1682             /**************************
1683              * CALCULATE INTERACTIONS *
1684              **************************/
1685
1686             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
1687             {
1688
1689             /* REACTION-FIELD ELECTROSTATICS */
1690             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
1691
1692             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
1693
1694             fscal            = felec;
1695
1696             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1697
1698             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1699
1700             /* Update vectorial force */
1701             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1702             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1703             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1704             
1705             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1706             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1707             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1708
1709             }
1710
1711             /**************************
1712              * CALCULATE INTERACTIONS *
1713              **************************/
1714
1715             if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
1716             {
1717
1718             /* REACTION-FIELD ELECTROSTATICS */
1719             felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
1720
1721             cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
1722
1723             fscal            = felec;
1724
1725             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1726
1727             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1728
1729             /* Update vectorial force */
1730             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
1731             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1732             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1733             
1734             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1735             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1736             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1737
1738             }
1739
1740             /**************************
1741              * CALCULATE INTERACTIONS *
1742              **************************/
1743
1744             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1745             {
1746
1747             /* REACTION-FIELD ELECTROSTATICS */
1748             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1749
1750             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1751
1752             fscal            = felec;
1753
1754             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1755
1756             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1757
1758             /* Update vectorial force */
1759             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1760             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1761             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1762             
1763             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1764             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1765             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1766
1767             }
1768
1769             /**************************
1770              * CALCULATE INTERACTIONS *
1771              **************************/
1772
1773             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1774             {
1775
1776             /* REACTION-FIELD ELECTROSTATICS */
1777             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1778
1779             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1780
1781             fscal            = felec;
1782
1783             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1784
1785             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1786
1787             /* Update vectorial force */
1788             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1789             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1790             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1791             
1792             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1793             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1794             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1795
1796             }
1797
1798             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1799
1800             /* Inner loop uses 297 flops */
1801         }
1802
1803         /* End of innermost loop */
1804
1805         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1806                                               f+i_coord_offset,fshift+i_shift_offset);
1807
1808         /* Increment number of inner iterations */
1809         inneriter                  += j_index_end - j_index_start;
1810
1811         /* Outer loop uses 18 flops */
1812     }
1813
1814     /* Increment number of outer iterations */
1815     outeriter        += nri;
1816
1817     /* Update outer/inner flops */
1818
1819     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*18 + inneriter*297);
1820 }