ee249c974d01f425e6f1ea88e944840bfde172b7
[alexxy/gromacs.git] / src / gmxlib / nonbonded / nb_kernel_avx_128_fma_double / nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_avx_128_fma_double.c
1 /*
2  * Note: this file was generated by the Gromacs avx_128_fma_double kernel generator.
3  *
4  *                This source code is part of
5  *
6  *                 G   R   O   M   A   C   S
7  *
8  * Copyright (c) 2001-2012, The GROMACS Development Team
9  *
10  * Gromacs is a library for molecular simulation and trajectory analysis,
11  * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
12  * a full list of developers and information, check out http://www.gromacs.org
13  *
14  * This program is free software; you can redistribute it and/or modify it under
15  * the terms of the GNU Lesser General Public License as published by the Free
16  * Software Foundation; either version 2 of the License, or (at your option) any
17  * later version.
18  *
19  * To help fund GROMACS development, we humbly ask that you cite
20  * the papers people have written on it - you can find them on the website.
21  */
22 #ifdef HAVE_CONFIG_H
23 #include <config.h>
24 #endif
25
26 #include <math.h>
27
28 #include "../nb_kernel.h"
29 #include "types/simple.h"
30 #include "vec.h"
31 #include "nrnb.h"
32
33 #include "gmx_math_x86_avx_128_fma_double.h"
34 #include "kernelutil_x86_avx_128_fma_double.h"
35
36 /*
37  * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_avx_128_fma_double
38  * Electrostatics interaction: ReactionField
39  * VdW interaction:            LennardJones
40  * Geometry:                   Water4-Water4
41  * Calculate force/pot:        PotentialAndForce
42  */
43 void
44 nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_avx_128_fma_double
45                     (t_nblist * gmx_restrict                nlist,
46                      rvec * gmx_restrict                    xx,
47                      rvec * gmx_restrict                    ff,
48                      t_forcerec * gmx_restrict              fr,
49                      t_mdatoms * gmx_restrict               mdatoms,
50                      nb_kernel_data_t * gmx_restrict        kernel_data,
51                      t_nrnb * gmx_restrict                  nrnb)
52 {
53     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
54      * just 0 for non-waters.
55      * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
56      * jnr indices corresponding to data put in the four positions in the SIMD register.
57      */
58     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
59     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
60     int              jnrA,jnrB;
61     int              j_coord_offsetA,j_coord_offsetB;
62     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
63     real             rcutoff_scalar;
64     real             *shiftvec,*fshift,*x,*f;
65     __m128d          tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
66     int              vdwioffset0;
67     __m128d          ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
68     int              vdwioffset1;
69     __m128d          ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
70     int              vdwioffset2;
71     __m128d          ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
72     int              vdwioffset3;
73     __m128d          ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
74     int              vdwjidx0A,vdwjidx0B;
75     __m128d          jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
76     int              vdwjidx1A,vdwjidx1B;
77     __m128d          jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
78     int              vdwjidx2A,vdwjidx2B;
79     __m128d          jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
80     int              vdwjidx3A,vdwjidx3B;
81     __m128d          jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
82     __m128d          dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
83     __m128d          dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
84     __m128d          dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
85     __m128d          dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
86     __m128d          dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
87     __m128d          dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
88     __m128d          dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
89     __m128d          dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
90     __m128d          dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
91     __m128d          dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
92     __m128d          velec,felec,velecsum,facel,crf,krf,krf2;
93     real             *charge;
94     int              nvdwtype;
95     __m128d          rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
96     int              *vdwtype;
97     real             *vdwparam;
98     __m128d          one_sixth   = _mm_set1_pd(1.0/6.0);
99     __m128d          one_twelfth = _mm_set1_pd(1.0/12.0);
100     __m128d          dummy_mask,cutoff_mask;
101     __m128d          signbit   = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
102     __m128d          one     = _mm_set1_pd(1.0);
103     __m128d          two     = _mm_set1_pd(2.0);
104     x                = xx[0];
105     f                = ff[0];
106
107     nri              = nlist->nri;
108     iinr             = nlist->iinr;
109     jindex           = nlist->jindex;
110     jjnr             = nlist->jjnr;
111     shiftidx         = nlist->shift;
112     gid              = nlist->gid;
113     shiftvec         = fr->shift_vec[0];
114     fshift           = fr->fshift[0];
115     facel            = _mm_set1_pd(fr->epsfac);
116     charge           = mdatoms->chargeA;
117     krf              = _mm_set1_pd(fr->ic->k_rf);
118     krf2             = _mm_set1_pd(fr->ic->k_rf*2.0);
119     crf              = _mm_set1_pd(fr->ic->c_rf);
120     nvdwtype         = fr->ntype;
121     vdwparam         = fr->nbfp;
122     vdwtype          = mdatoms->typeA;
123
124     /* Setup water-specific parameters */
125     inr              = nlist->iinr[0];
126     iq1              = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+1]));
127     iq2              = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+2]));
128     iq3              = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+3]));
129     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
130
131     jq1              = _mm_set1_pd(charge[inr+1]);
132     jq2              = _mm_set1_pd(charge[inr+2]);
133     jq3              = _mm_set1_pd(charge[inr+3]);
134     vdwjidx0A        = 2*vdwtype[inr+0];
135     c6_00            = _mm_set1_pd(vdwparam[vdwioffset0+vdwjidx0A]);
136     c12_00           = _mm_set1_pd(vdwparam[vdwioffset0+vdwjidx0A+1]);
137     qq11             = _mm_mul_pd(iq1,jq1);
138     qq12             = _mm_mul_pd(iq1,jq2);
139     qq13             = _mm_mul_pd(iq1,jq3);
140     qq21             = _mm_mul_pd(iq2,jq1);
141     qq22             = _mm_mul_pd(iq2,jq2);
142     qq23             = _mm_mul_pd(iq2,jq3);
143     qq31             = _mm_mul_pd(iq3,jq1);
144     qq32             = _mm_mul_pd(iq3,jq2);
145     qq33             = _mm_mul_pd(iq3,jq3);
146
147     /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
148     rcutoff_scalar   = fr->rcoulomb;
149     rcutoff          = _mm_set1_pd(rcutoff_scalar);
150     rcutoff2         = _mm_mul_pd(rcutoff,rcutoff);
151
152     sh_vdw_invrcut6  = _mm_set1_pd(fr->ic->sh_invrc6);
153     rvdw             = _mm_set1_pd(fr->rvdw);
154
155     /* Avoid stupid compiler warnings */
156     jnrA = jnrB = 0;
157     j_coord_offsetA = 0;
158     j_coord_offsetB = 0;
159
160     outeriter        = 0;
161     inneriter        = 0;
162
163     /* Start outer loop over neighborlists */
164     for(iidx=0; iidx<nri; iidx++)
165     {
166         /* Load shift vector for this list */
167         i_shift_offset   = DIM*shiftidx[iidx];
168
169         /* Load limits for loop over neighbors */
170         j_index_start    = jindex[iidx];
171         j_index_end      = jindex[iidx+1];
172
173         /* Get outer coordinate index */
174         inr              = iinr[iidx];
175         i_coord_offset   = DIM*inr;
176
177         /* Load i particle coords and add shift vector */
178         gmx_mm_load_shift_and_4rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,
179                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
180
181         fix0             = _mm_setzero_pd();
182         fiy0             = _mm_setzero_pd();
183         fiz0             = _mm_setzero_pd();
184         fix1             = _mm_setzero_pd();
185         fiy1             = _mm_setzero_pd();
186         fiz1             = _mm_setzero_pd();
187         fix2             = _mm_setzero_pd();
188         fiy2             = _mm_setzero_pd();
189         fiz2             = _mm_setzero_pd();
190         fix3             = _mm_setzero_pd();
191         fiy3             = _mm_setzero_pd();
192         fiz3             = _mm_setzero_pd();
193
194         /* Reset potential sums */
195         velecsum         = _mm_setzero_pd();
196         vvdwsum          = _mm_setzero_pd();
197
198         /* Start inner kernel loop */
199         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
200         {
201
202             /* Get j neighbor index, and coordinate index */
203             jnrA             = jjnr[jidx];
204             jnrB             = jjnr[jidx+1];
205             j_coord_offsetA  = DIM*jnrA;
206             j_coord_offsetB  = DIM*jnrB;
207
208             /* load j atom coordinates */
209             gmx_mm_load_4rvec_2ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
210                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
211                                               &jy2,&jz2,&jx3,&jy3,&jz3);
212
213             /* Calculate displacement vector */
214             dx00             = _mm_sub_pd(ix0,jx0);
215             dy00             = _mm_sub_pd(iy0,jy0);
216             dz00             = _mm_sub_pd(iz0,jz0);
217             dx11             = _mm_sub_pd(ix1,jx1);
218             dy11             = _mm_sub_pd(iy1,jy1);
219             dz11             = _mm_sub_pd(iz1,jz1);
220             dx12             = _mm_sub_pd(ix1,jx2);
221             dy12             = _mm_sub_pd(iy1,jy2);
222             dz12             = _mm_sub_pd(iz1,jz2);
223             dx13             = _mm_sub_pd(ix1,jx3);
224             dy13             = _mm_sub_pd(iy1,jy3);
225             dz13             = _mm_sub_pd(iz1,jz3);
226             dx21             = _mm_sub_pd(ix2,jx1);
227             dy21             = _mm_sub_pd(iy2,jy1);
228             dz21             = _mm_sub_pd(iz2,jz1);
229             dx22             = _mm_sub_pd(ix2,jx2);
230             dy22             = _mm_sub_pd(iy2,jy2);
231             dz22             = _mm_sub_pd(iz2,jz2);
232             dx23             = _mm_sub_pd(ix2,jx3);
233             dy23             = _mm_sub_pd(iy2,jy3);
234             dz23             = _mm_sub_pd(iz2,jz3);
235             dx31             = _mm_sub_pd(ix3,jx1);
236             dy31             = _mm_sub_pd(iy3,jy1);
237             dz31             = _mm_sub_pd(iz3,jz1);
238             dx32             = _mm_sub_pd(ix3,jx2);
239             dy32             = _mm_sub_pd(iy3,jy2);
240             dz32             = _mm_sub_pd(iz3,jz2);
241             dx33             = _mm_sub_pd(ix3,jx3);
242             dy33             = _mm_sub_pd(iy3,jy3);
243             dz33             = _mm_sub_pd(iz3,jz3);
244
245             /* Calculate squared distance and things based on it */
246             rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
247             rsq11            = gmx_mm_calc_rsq_pd(dx11,dy11,dz11);
248             rsq12            = gmx_mm_calc_rsq_pd(dx12,dy12,dz12);
249             rsq13            = gmx_mm_calc_rsq_pd(dx13,dy13,dz13);
250             rsq21            = gmx_mm_calc_rsq_pd(dx21,dy21,dz21);
251             rsq22            = gmx_mm_calc_rsq_pd(dx22,dy22,dz22);
252             rsq23            = gmx_mm_calc_rsq_pd(dx23,dy23,dz23);
253             rsq31            = gmx_mm_calc_rsq_pd(dx31,dy31,dz31);
254             rsq32            = gmx_mm_calc_rsq_pd(dx32,dy32,dz32);
255             rsq33            = gmx_mm_calc_rsq_pd(dx33,dy33,dz33);
256
257             rinv11           = gmx_mm_invsqrt_pd(rsq11);
258             rinv12           = gmx_mm_invsqrt_pd(rsq12);
259             rinv13           = gmx_mm_invsqrt_pd(rsq13);
260             rinv21           = gmx_mm_invsqrt_pd(rsq21);
261             rinv22           = gmx_mm_invsqrt_pd(rsq22);
262             rinv23           = gmx_mm_invsqrt_pd(rsq23);
263             rinv31           = gmx_mm_invsqrt_pd(rsq31);
264             rinv32           = gmx_mm_invsqrt_pd(rsq32);
265             rinv33           = gmx_mm_invsqrt_pd(rsq33);
266
267             rinvsq00         = gmx_mm_inv_pd(rsq00);
268             rinvsq11         = _mm_mul_pd(rinv11,rinv11);
269             rinvsq12         = _mm_mul_pd(rinv12,rinv12);
270             rinvsq13         = _mm_mul_pd(rinv13,rinv13);
271             rinvsq21         = _mm_mul_pd(rinv21,rinv21);
272             rinvsq22         = _mm_mul_pd(rinv22,rinv22);
273             rinvsq23         = _mm_mul_pd(rinv23,rinv23);
274             rinvsq31         = _mm_mul_pd(rinv31,rinv31);
275             rinvsq32         = _mm_mul_pd(rinv32,rinv32);
276             rinvsq33         = _mm_mul_pd(rinv33,rinv33);
277
278             fjx0             = _mm_setzero_pd();
279             fjy0             = _mm_setzero_pd();
280             fjz0             = _mm_setzero_pd();
281             fjx1             = _mm_setzero_pd();
282             fjy1             = _mm_setzero_pd();
283             fjz1             = _mm_setzero_pd();
284             fjx2             = _mm_setzero_pd();
285             fjy2             = _mm_setzero_pd();
286             fjz2             = _mm_setzero_pd();
287             fjx3             = _mm_setzero_pd();
288             fjy3             = _mm_setzero_pd();
289             fjz3             = _mm_setzero_pd();
290
291             /**************************
292              * CALCULATE INTERACTIONS *
293              **************************/
294
295             if (gmx_mm_any_lt(rsq00,rcutoff2))
296             {
297
298             /* LENNARD-JONES DISPERSION/REPULSION */
299
300             rinvsix          = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
301             vvdw6            = _mm_mul_pd(c6_00,rinvsix);
302             vvdw12           = _mm_mul_pd(c12_00,_mm_mul_pd(rinvsix,rinvsix));
303             vvdw             = _mm_msub_pd(_mm_nmacc_pd(c12_00,_mm_mul_pd(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
304                                            _mm_mul_pd(_mm_nmacc_pd( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
305             fvdw             = _mm_mul_pd(_mm_sub_pd(vvdw12,vvdw6),rinvsq00);
306
307             cutoff_mask      = _mm_cmplt_pd(rsq00,rcutoff2);
308
309             /* Update potential sum for this i atom from the interaction with this j atom. */
310             vvdw             = _mm_and_pd(vvdw,cutoff_mask);
311             vvdwsum          = _mm_add_pd(vvdwsum,vvdw);
312
313             fscal            = fvdw;
314
315             fscal            = _mm_and_pd(fscal,cutoff_mask);
316
317             /* Update vectorial force */
318             fix0             = _mm_macc_pd(dx00,fscal,fix0);
319             fiy0             = _mm_macc_pd(dy00,fscal,fiy0);
320             fiz0             = _mm_macc_pd(dz00,fscal,fiz0);
321             
322             fjx0             = _mm_macc_pd(dx00,fscal,fjx0);
323             fjy0             = _mm_macc_pd(dy00,fscal,fjy0);
324             fjz0             = _mm_macc_pd(dz00,fscal,fjz0);
325
326             }
327
328             /**************************
329              * CALCULATE INTERACTIONS *
330              **************************/
331
332             if (gmx_mm_any_lt(rsq11,rcutoff2))
333             {
334
335             /* REACTION-FIELD ELECTROSTATICS */
336             velec            = _mm_mul_pd(qq11,_mm_sub_pd(_mm_macc_pd(krf,rsq11,rinv11),crf));
337             felec            = _mm_mul_pd(qq11,_mm_msub_pd(rinv11,rinvsq11,krf2));
338
339             cutoff_mask      = _mm_cmplt_pd(rsq11,rcutoff2);
340
341             /* Update potential sum for this i atom from the interaction with this j atom. */
342             velec            = _mm_and_pd(velec,cutoff_mask);
343             velecsum         = _mm_add_pd(velecsum,velec);
344
345             fscal            = felec;
346
347             fscal            = _mm_and_pd(fscal,cutoff_mask);
348
349             /* Update vectorial force */
350             fix1             = _mm_macc_pd(dx11,fscal,fix1);
351             fiy1             = _mm_macc_pd(dy11,fscal,fiy1);
352             fiz1             = _mm_macc_pd(dz11,fscal,fiz1);
353             
354             fjx1             = _mm_macc_pd(dx11,fscal,fjx1);
355             fjy1             = _mm_macc_pd(dy11,fscal,fjy1);
356             fjz1             = _mm_macc_pd(dz11,fscal,fjz1);
357
358             }
359
360             /**************************
361              * CALCULATE INTERACTIONS *
362              **************************/
363
364             if (gmx_mm_any_lt(rsq12,rcutoff2))
365             {
366
367             /* REACTION-FIELD ELECTROSTATICS */
368             velec            = _mm_mul_pd(qq12,_mm_sub_pd(_mm_macc_pd(krf,rsq12,rinv12),crf));
369             felec            = _mm_mul_pd(qq12,_mm_msub_pd(rinv12,rinvsq12,krf2));
370
371             cutoff_mask      = _mm_cmplt_pd(rsq12,rcutoff2);
372
373             /* Update potential sum for this i atom from the interaction with this j atom. */
374             velec            = _mm_and_pd(velec,cutoff_mask);
375             velecsum         = _mm_add_pd(velecsum,velec);
376
377             fscal            = felec;
378
379             fscal            = _mm_and_pd(fscal,cutoff_mask);
380
381             /* Update vectorial force */
382             fix1             = _mm_macc_pd(dx12,fscal,fix1);
383             fiy1             = _mm_macc_pd(dy12,fscal,fiy1);
384             fiz1             = _mm_macc_pd(dz12,fscal,fiz1);
385             
386             fjx2             = _mm_macc_pd(dx12,fscal,fjx2);
387             fjy2             = _mm_macc_pd(dy12,fscal,fjy2);
388             fjz2             = _mm_macc_pd(dz12,fscal,fjz2);
389
390             }
391
392             /**************************
393              * CALCULATE INTERACTIONS *
394              **************************/
395
396             if (gmx_mm_any_lt(rsq13,rcutoff2))
397             {
398
399             /* REACTION-FIELD ELECTROSTATICS */
400             velec            = _mm_mul_pd(qq13,_mm_sub_pd(_mm_macc_pd(krf,rsq13,rinv13),crf));
401             felec            = _mm_mul_pd(qq13,_mm_msub_pd(rinv13,rinvsq13,krf2));
402
403             cutoff_mask      = _mm_cmplt_pd(rsq13,rcutoff2);
404
405             /* Update potential sum for this i atom from the interaction with this j atom. */
406             velec            = _mm_and_pd(velec,cutoff_mask);
407             velecsum         = _mm_add_pd(velecsum,velec);
408
409             fscal            = felec;
410
411             fscal            = _mm_and_pd(fscal,cutoff_mask);
412
413             /* Update vectorial force */
414             fix1             = _mm_macc_pd(dx13,fscal,fix1);
415             fiy1             = _mm_macc_pd(dy13,fscal,fiy1);
416             fiz1             = _mm_macc_pd(dz13,fscal,fiz1);
417             
418             fjx3             = _mm_macc_pd(dx13,fscal,fjx3);
419             fjy3             = _mm_macc_pd(dy13,fscal,fjy3);
420             fjz3             = _mm_macc_pd(dz13,fscal,fjz3);
421
422             }
423
424             /**************************
425              * CALCULATE INTERACTIONS *
426              **************************/
427
428             if (gmx_mm_any_lt(rsq21,rcutoff2))
429             {
430
431             /* REACTION-FIELD ELECTROSTATICS */
432             velec            = _mm_mul_pd(qq21,_mm_sub_pd(_mm_macc_pd(krf,rsq21,rinv21),crf));
433             felec            = _mm_mul_pd(qq21,_mm_msub_pd(rinv21,rinvsq21,krf2));
434
435             cutoff_mask      = _mm_cmplt_pd(rsq21,rcutoff2);
436
437             /* Update potential sum for this i atom from the interaction with this j atom. */
438             velec            = _mm_and_pd(velec,cutoff_mask);
439             velecsum         = _mm_add_pd(velecsum,velec);
440
441             fscal            = felec;
442
443             fscal            = _mm_and_pd(fscal,cutoff_mask);
444
445             /* Update vectorial force */
446             fix2             = _mm_macc_pd(dx21,fscal,fix2);
447             fiy2             = _mm_macc_pd(dy21,fscal,fiy2);
448             fiz2             = _mm_macc_pd(dz21,fscal,fiz2);
449             
450             fjx1             = _mm_macc_pd(dx21,fscal,fjx1);
451             fjy1             = _mm_macc_pd(dy21,fscal,fjy1);
452             fjz1             = _mm_macc_pd(dz21,fscal,fjz1);
453
454             }
455
456             /**************************
457              * CALCULATE INTERACTIONS *
458              **************************/
459
460             if (gmx_mm_any_lt(rsq22,rcutoff2))
461             {
462
463             /* REACTION-FIELD ELECTROSTATICS */
464             velec            = _mm_mul_pd(qq22,_mm_sub_pd(_mm_macc_pd(krf,rsq22,rinv22),crf));
465             felec            = _mm_mul_pd(qq22,_mm_msub_pd(rinv22,rinvsq22,krf2));
466
467             cutoff_mask      = _mm_cmplt_pd(rsq22,rcutoff2);
468
469             /* Update potential sum for this i atom from the interaction with this j atom. */
470             velec            = _mm_and_pd(velec,cutoff_mask);
471             velecsum         = _mm_add_pd(velecsum,velec);
472
473             fscal            = felec;
474
475             fscal            = _mm_and_pd(fscal,cutoff_mask);
476
477             /* Update vectorial force */
478             fix2             = _mm_macc_pd(dx22,fscal,fix2);
479             fiy2             = _mm_macc_pd(dy22,fscal,fiy2);
480             fiz2             = _mm_macc_pd(dz22,fscal,fiz2);
481             
482             fjx2             = _mm_macc_pd(dx22,fscal,fjx2);
483             fjy2             = _mm_macc_pd(dy22,fscal,fjy2);
484             fjz2             = _mm_macc_pd(dz22,fscal,fjz2);
485
486             }
487
488             /**************************
489              * CALCULATE INTERACTIONS *
490              **************************/
491
492             if (gmx_mm_any_lt(rsq23,rcutoff2))
493             {
494
495             /* REACTION-FIELD ELECTROSTATICS */
496             velec            = _mm_mul_pd(qq23,_mm_sub_pd(_mm_macc_pd(krf,rsq23,rinv23),crf));
497             felec            = _mm_mul_pd(qq23,_mm_msub_pd(rinv23,rinvsq23,krf2));
498
499             cutoff_mask      = _mm_cmplt_pd(rsq23,rcutoff2);
500
501             /* Update potential sum for this i atom from the interaction with this j atom. */
502             velec            = _mm_and_pd(velec,cutoff_mask);
503             velecsum         = _mm_add_pd(velecsum,velec);
504
505             fscal            = felec;
506
507             fscal            = _mm_and_pd(fscal,cutoff_mask);
508
509             /* Update vectorial force */
510             fix2             = _mm_macc_pd(dx23,fscal,fix2);
511             fiy2             = _mm_macc_pd(dy23,fscal,fiy2);
512             fiz2             = _mm_macc_pd(dz23,fscal,fiz2);
513             
514             fjx3             = _mm_macc_pd(dx23,fscal,fjx3);
515             fjy3             = _mm_macc_pd(dy23,fscal,fjy3);
516             fjz3             = _mm_macc_pd(dz23,fscal,fjz3);
517
518             }
519
520             /**************************
521              * CALCULATE INTERACTIONS *
522              **************************/
523
524             if (gmx_mm_any_lt(rsq31,rcutoff2))
525             {
526
527             /* REACTION-FIELD ELECTROSTATICS */
528             velec            = _mm_mul_pd(qq31,_mm_sub_pd(_mm_macc_pd(krf,rsq31,rinv31),crf));
529             felec            = _mm_mul_pd(qq31,_mm_msub_pd(rinv31,rinvsq31,krf2));
530
531             cutoff_mask      = _mm_cmplt_pd(rsq31,rcutoff2);
532
533             /* Update potential sum for this i atom from the interaction with this j atom. */
534             velec            = _mm_and_pd(velec,cutoff_mask);
535             velecsum         = _mm_add_pd(velecsum,velec);
536
537             fscal            = felec;
538
539             fscal            = _mm_and_pd(fscal,cutoff_mask);
540
541             /* Update vectorial force */
542             fix3             = _mm_macc_pd(dx31,fscal,fix3);
543             fiy3             = _mm_macc_pd(dy31,fscal,fiy3);
544             fiz3             = _mm_macc_pd(dz31,fscal,fiz3);
545             
546             fjx1             = _mm_macc_pd(dx31,fscal,fjx1);
547             fjy1             = _mm_macc_pd(dy31,fscal,fjy1);
548             fjz1             = _mm_macc_pd(dz31,fscal,fjz1);
549
550             }
551
552             /**************************
553              * CALCULATE INTERACTIONS *
554              **************************/
555
556             if (gmx_mm_any_lt(rsq32,rcutoff2))
557             {
558
559             /* REACTION-FIELD ELECTROSTATICS */
560             velec            = _mm_mul_pd(qq32,_mm_sub_pd(_mm_macc_pd(krf,rsq32,rinv32),crf));
561             felec            = _mm_mul_pd(qq32,_mm_msub_pd(rinv32,rinvsq32,krf2));
562
563             cutoff_mask      = _mm_cmplt_pd(rsq32,rcutoff2);
564
565             /* Update potential sum for this i atom from the interaction with this j atom. */
566             velec            = _mm_and_pd(velec,cutoff_mask);
567             velecsum         = _mm_add_pd(velecsum,velec);
568
569             fscal            = felec;
570
571             fscal            = _mm_and_pd(fscal,cutoff_mask);
572
573             /* Update vectorial force */
574             fix3             = _mm_macc_pd(dx32,fscal,fix3);
575             fiy3             = _mm_macc_pd(dy32,fscal,fiy3);
576             fiz3             = _mm_macc_pd(dz32,fscal,fiz3);
577             
578             fjx2             = _mm_macc_pd(dx32,fscal,fjx2);
579             fjy2             = _mm_macc_pd(dy32,fscal,fjy2);
580             fjz2             = _mm_macc_pd(dz32,fscal,fjz2);
581
582             }
583
584             /**************************
585              * CALCULATE INTERACTIONS *
586              **************************/
587
588             if (gmx_mm_any_lt(rsq33,rcutoff2))
589             {
590
591             /* REACTION-FIELD ELECTROSTATICS */
592             velec            = _mm_mul_pd(qq33,_mm_sub_pd(_mm_macc_pd(krf,rsq33,rinv33),crf));
593             felec            = _mm_mul_pd(qq33,_mm_msub_pd(rinv33,rinvsq33,krf2));
594
595             cutoff_mask      = _mm_cmplt_pd(rsq33,rcutoff2);
596
597             /* Update potential sum for this i atom from the interaction with this j atom. */
598             velec            = _mm_and_pd(velec,cutoff_mask);
599             velecsum         = _mm_add_pd(velecsum,velec);
600
601             fscal            = felec;
602
603             fscal            = _mm_and_pd(fscal,cutoff_mask);
604
605             /* Update vectorial force */
606             fix3             = _mm_macc_pd(dx33,fscal,fix3);
607             fiy3             = _mm_macc_pd(dy33,fscal,fiy3);
608             fiz3             = _mm_macc_pd(dz33,fscal,fiz3);
609             
610             fjx3             = _mm_macc_pd(dx33,fscal,fjx3);
611             fjy3             = _mm_macc_pd(dy33,fscal,fjy3);
612             fjz3             = _mm_macc_pd(dz33,fscal,fjz3);
613
614             }
615
616             gmx_mm_decrement_4rvec_2ptr_swizzle_pd(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
617
618             /* Inner loop uses 398 flops */
619         }
620
621         if(jidx<j_index_end)
622         {
623
624             jnrA             = jjnr[jidx];
625             j_coord_offsetA  = DIM*jnrA;
626
627             /* load j atom coordinates */
628             gmx_mm_load_4rvec_1ptr_swizzle_pd(x+j_coord_offsetA,
629                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
630                                               &jy2,&jz2,&jx3,&jy3,&jz3);
631
632             /* Calculate displacement vector */
633             dx00             = _mm_sub_pd(ix0,jx0);
634             dy00             = _mm_sub_pd(iy0,jy0);
635             dz00             = _mm_sub_pd(iz0,jz0);
636             dx11             = _mm_sub_pd(ix1,jx1);
637             dy11             = _mm_sub_pd(iy1,jy1);
638             dz11             = _mm_sub_pd(iz1,jz1);
639             dx12             = _mm_sub_pd(ix1,jx2);
640             dy12             = _mm_sub_pd(iy1,jy2);
641             dz12             = _mm_sub_pd(iz1,jz2);
642             dx13             = _mm_sub_pd(ix1,jx3);
643             dy13             = _mm_sub_pd(iy1,jy3);
644             dz13             = _mm_sub_pd(iz1,jz3);
645             dx21             = _mm_sub_pd(ix2,jx1);
646             dy21             = _mm_sub_pd(iy2,jy1);
647             dz21             = _mm_sub_pd(iz2,jz1);
648             dx22             = _mm_sub_pd(ix2,jx2);
649             dy22             = _mm_sub_pd(iy2,jy2);
650             dz22             = _mm_sub_pd(iz2,jz2);
651             dx23             = _mm_sub_pd(ix2,jx3);
652             dy23             = _mm_sub_pd(iy2,jy3);
653             dz23             = _mm_sub_pd(iz2,jz3);
654             dx31             = _mm_sub_pd(ix3,jx1);
655             dy31             = _mm_sub_pd(iy3,jy1);
656             dz31             = _mm_sub_pd(iz3,jz1);
657             dx32             = _mm_sub_pd(ix3,jx2);
658             dy32             = _mm_sub_pd(iy3,jy2);
659             dz32             = _mm_sub_pd(iz3,jz2);
660             dx33             = _mm_sub_pd(ix3,jx3);
661             dy33             = _mm_sub_pd(iy3,jy3);
662             dz33             = _mm_sub_pd(iz3,jz3);
663
664             /* Calculate squared distance and things based on it */
665             rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
666             rsq11            = gmx_mm_calc_rsq_pd(dx11,dy11,dz11);
667             rsq12            = gmx_mm_calc_rsq_pd(dx12,dy12,dz12);
668             rsq13            = gmx_mm_calc_rsq_pd(dx13,dy13,dz13);
669             rsq21            = gmx_mm_calc_rsq_pd(dx21,dy21,dz21);
670             rsq22            = gmx_mm_calc_rsq_pd(dx22,dy22,dz22);
671             rsq23            = gmx_mm_calc_rsq_pd(dx23,dy23,dz23);
672             rsq31            = gmx_mm_calc_rsq_pd(dx31,dy31,dz31);
673             rsq32            = gmx_mm_calc_rsq_pd(dx32,dy32,dz32);
674             rsq33            = gmx_mm_calc_rsq_pd(dx33,dy33,dz33);
675
676             rinv11           = gmx_mm_invsqrt_pd(rsq11);
677             rinv12           = gmx_mm_invsqrt_pd(rsq12);
678             rinv13           = gmx_mm_invsqrt_pd(rsq13);
679             rinv21           = gmx_mm_invsqrt_pd(rsq21);
680             rinv22           = gmx_mm_invsqrt_pd(rsq22);
681             rinv23           = gmx_mm_invsqrt_pd(rsq23);
682             rinv31           = gmx_mm_invsqrt_pd(rsq31);
683             rinv32           = gmx_mm_invsqrt_pd(rsq32);
684             rinv33           = gmx_mm_invsqrt_pd(rsq33);
685
686             rinvsq00         = gmx_mm_inv_pd(rsq00);
687             rinvsq11         = _mm_mul_pd(rinv11,rinv11);
688             rinvsq12         = _mm_mul_pd(rinv12,rinv12);
689             rinvsq13         = _mm_mul_pd(rinv13,rinv13);
690             rinvsq21         = _mm_mul_pd(rinv21,rinv21);
691             rinvsq22         = _mm_mul_pd(rinv22,rinv22);
692             rinvsq23         = _mm_mul_pd(rinv23,rinv23);
693             rinvsq31         = _mm_mul_pd(rinv31,rinv31);
694             rinvsq32         = _mm_mul_pd(rinv32,rinv32);
695             rinvsq33         = _mm_mul_pd(rinv33,rinv33);
696
697             fjx0             = _mm_setzero_pd();
698             fjy0             = _mm_setzero_pd();
699             fjz0             = _mm_setzero_pd();
700             fjx1             = _mm_setzero_pd();
701             fjy1             = _mm_setzero_pd();
702             fjz1             = _mm_setzero_pd();
703             fjx2             = _mm_setzero_pd();
704             fjy2             = _mm_setzero_pd();
705             fjz2             = _mm_setzero_pd();
706             fjx3             = _mm_setzero_pd();
707             fjy3             = _mm_setzero_pd();
708             fjz3             = _mm_setzero_pd();
709
710             /**************************
711              * CALCULATE INTERACTIONS *
712              **************************/
713
714             if (gmx_mm_any_lt(rsq00,rcutoff2))
715             {
716
717             /* LENNARD-JONES DISPERSION/REPULSION */
718
719             rinvsix          = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
720             vvdw6            = _mm_mul_pd(c6_00,rinvsix);
721             vvdw12           = _mm_mul_pd(c12_00,_mm_mul_pd(rinvsix,rinvsix));
722             vvdw             = _mm_msub_pd(_mm_nmacc_pd(c12_00,_mm_mul_pd(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
723                                            _mm_mul_pd(_mm_nmacc_pd( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
724             fvdw             = _mm_mul_pd(_mm_sub_pd(vvdw12,vvdw6),rinvsq00);
725
726             cutoff_mask      = _mm_cmplt_pd(rsq00,rcutoff2);
727
728             /* Update potential sum for this i atom from the interaction with this j atom. */
729             vvdw             = _mm_and_pd(vvdw,cutoff_mask);
730             vvdw             = _mm_unpacklo_pd(vvdw,_mm_setzero_pd());
731             vvdwsum          = _mm_add_pd(vvdwsum,vvdw);
732
733             fscal            = fvdw;
734
735             fscal            = _mm_and_pd(fscal,cutoff_mask);
736
737             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
738
739             /* Update vectorial force */
740             fix0             = _mm_macc_pd(dx00,fscal,fix0);
741             fiy0             = _mm_macc_pd(dy00,fscal,fiy0);
742             fiz0             = _mm_macc_pd(dz00,fscal,fiz0);
743             
744             fjx0             = _mm_macc_pd(dx00,fscal,fjx0);
745             fjy0             = _mm_macc_pd(dy00,fscal,fjy0);
746             fjz0             = _mm_macc_pd(dz00,fscal,fjz0);
747
748             }
749
750             /**************************
751              * CALCULATE INTERACTIONS *
752              **************************/
753
754             if (gmx_mm_any_lt(rsq11,rcutoff2))
755             {
756
757             /* REACTION-FIELD ELECTROSTATICS */
758             velec            = _mm_mul_pd(qq11,_mm_sub_pd(_mm_macc_pd(krf,rsq11,rinv11),crf));
759             felec            = _mm_mul_pd(qq11,_mm_msub_pd(rinv11,rinvsq11,krf2));
760
761             cutoff_mask      = _mm_cmplt_pd(rsq11,rcutoff2);
762
763             /* Update potential sum for this i atom from the interaction with this j atom. */
764             velec            = _mm_and_pd(velec,cutoff_mask);
765             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
766             velecsum         = _mm_add_pd(velecsum,velec);
767
768             fscal            = felec;
769
770             fscal            = _mm_and_pd(fscal,cutoff_mask);
771
772             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
773
774             /* Update vectorial force */
775             fix1             = _mm_macc_pd(dx11,fscal,fix1);
776             fiy1             = _mm_macc_pd(dy11,fscal,fiy1);
777             fiz1             = _mm_macc_pd(dz11,fscal,fiz1);
778             
779             fjx1             = _mm_macc_pd(dx11,fscal,fjx1);
780             fjy1             = _mm_macc_pd(dy11,fscal,fjy1);
781             fjz1             = _mm_macc_pd(dz11,fscal,fjz1);
782
783             }
784
785             /**************************
786              * CALCULATE INTERACTIONS *
787              **************************/
788
789             if (gmx_mm_any_lt(rsq12,rcutoff2))
790             {
791
792             /* REACTION-FIELD ELECTROSTATICS */
793             velec            = _mm_mul_pd(qq12,_mm_sub_pd(_mm_macc_pd(krf,rsq12,rinv12),crf));
794             felec            = _mm_mul_pd(qq12,_mm_msub_pd(rinv12,rinvsq12,krf2));
795
796             cutoff_mask      = _mm_cmplt_pd(rsq12,rcutoff2);
797
798             /* Update potential sum for this i atom from the interaction with this j atom. */
799             velec            = _mm_and_pd(velec,cutoff_mask);
800             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
801             velecsum         = _mm_add_pd(velecsum,velec);
802
803             fscal            = felec;
804
805             fscal            = _mm_and_pd(fscal,cutoff_mask);
806
807             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
808
809             /* Update vectorial force */
810             fix1             = _mm_macc_pd(dx12,fscal,fix1);
811             fiy1             = _mm_macc_pd(dy12,fscal,fiy1);
812             fiz1             = _mm_macc_pd(dz12,fscal,fiz1);
813             
814             fjx2             = _mm_macc_pd(dx12,fscal,fjx2);
815             fjy2             = _mm_macc_pd(dy12,fscal,fjy2);
816             fjz2             = _mm_macc_pd(dz12,fscal,fjz2);
817
818             }
819
820             /**************************
821              * CALCULATE INTERACTIONS *
822              **************************/
823
824             if (gmx_mm_any_lt(rsq13,rcutoff2))
825             {
826
827             /* REACTION-FIELD ELECTROSTATICS */
828             velec            = _mm_mul_pd(qq13,_mm_sub_pd(_mm_macc_pd(krf,rsq13,rinv13),crf));
829             felec            = _mm_mul_pd(qq13,_mm_msub_pd(rinv13,rinvsq13,krf2));
830
831             cutoff_mask      = _mm_cmplt_pd(rsq13,rcutoff2);
832
833             /* Update potential sum for this i atom from the interaction with this j atom. */
834             velec            = _mm_and_pd(velec,cutoff_mask);
835             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
836             velecsum         = _mm_add_pd(velecsum,velec);
837
838             fscal            = felec;
839
840             fscal            = _mm_and_pd(fscal,cutoff_mask);
841
842             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
843
844             /* Update vectorial force */
845             fix1             = _mm_macc_pd(dx13,fscal,fix1);
846             fiy1             = _mm_macc_pd(dy13,fscal,fiy1);
847             fiz1             = _mm_macc_pd(dz13,fscal,fiz1);
848             
849             fjx3             = _mm_macc_pd(dx13,fscal,fjx3);
850             fjy3             = _mm_macc_pd(dy13,fscal,fjy3);
851             fjz3             = _mm_macc_pd(dz13,fscal,fjz3);
852
853             }
854
855             /**************************
856              * CALCULATE INTERACTIONS *
857              **************************/
858
859             if (gmx_mm_any_lt(rsq21,rcutoff2))
860             {
861
862             /* REACTION-FIELD ELECTROSTATICS */
863             velec            = _mm_mul_pd(qq21,_mm_sub_pd(_mm_macc_pd(krf,rsq21,rinv21),crf));
864             felec            = _mm_mul_pd(qq21,_mm_msub_pd(rinv21,rinvsq21,krf2));
865
866             cutoff_mask      = _mm_cmplt_pd(rsq21,rcutoff2);
867
868             /* Update potential sum for this i atom from the interaction with this j atom. */
869             velec            = _mm_and_pd(velec,cutoff_mask);
870             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
871             velecsum         = _mm_add_pd(velecsum,velec);
872
873             fscal            = felec;
874
875             fscal            = _mm_and_pd(fscal,cutoff_mask);
876
877             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
878
879             /* Update vectorial force */
880             fix2             = _mm_macc_pd(dx21,fscal,fix2);
881             fiy2             = _mm_macc_pd(dy21,fscal,fiy2);
882             fiz2             = _mm_macc_pd(dz21,fscal,fiz2);
883             
884             fjx1             = _mm_macc_pd(dx21,fscal,fjx1);
885             fjy1             = _mm_macc_pd(dy21,fscal,fjy1);
886             fjz1             = _mm_macc_pd(dz21,fscal,fjz1);
887
888             }
889
890             /**************************
891              * CALCULATE INTERACTIONS *
892              **************************/
893
894             if (gmx_mm_any_lt(rsq22,rcutoff2))
895             {
896
897             /* REACTION-FIELD ELECTROSTATICS */
898             velec            = _mm_mul_pd(qq22,_mm_sub_pd(_mm_macc_pd(krf,rsq22,rinv22),crf));
899             felec            = _mm_mul_pd(qq22,_mm_msub_pd(rinv22,rinvsq22,krf2));
900
901             cutoff_mask      = _mm_cmplt_pd(rsq22,rcutoff2);
902
903             /* Update potential sum for this i atom from the interaction with this j atom. */
904             velec            = _mm_and_pd(velec,cutoff_mask);
905             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
906             velecsum         = _mm_add_pd(velecsum,velec);
907
908             fscal            = felec;
909
910             fscal            = _mm_and_pd(fscal,cutoff_mask);
911
912             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
913
914             /* Update vectorial force */
915             fix2             = _mm_macc_pd(dx22,fscal,fix2);
916             fiy2             = _mm_macc_pd(dy22,fscal,fiy2);
917             fiz2             = _mm_macc_pd(dz22,fscal,fiz2);
918             
919             fjx2             = _mm_macc_pd(dx22,fscal,fjx2);
920             fjy2             = _mm_macc_pd(dy22,fscal,fjy2);
921             fjz2             = _mm_macc_pd(dz22,fscal,fjz2);
922
923             }
924
925             /**************************
926              * CALCULATE INTERACTIONS *
927              **************************/
928
929             if (gmx_mm_any_lt(rsq23,rcutoff2))
930             {
931
932             /* REACTION-FIELD ELECTROSTATICS */
933             velec            = _mm_mul_pd(qq23,_mm_sub_pd(_mm_macc_pd(krf,rsq23,rinv23),crf));
934             felec            = _mm_mul_pd(qq23,_mm_msub_pd(rinv23,rinvsq23,krf2));
935
936             cutoff_mask      = _mm_cmplt_pd(rsq23,rcutoff2);
937
938             /* Update potential sum for this i atom from the interaction with this j atom. */
939             velec            = _mm_and_pd(velec,cutoff_mask);
940             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
941             velecsum         = _mm_add_pd(velecsum,velec);
942
943             fscal            = felec;
944
945             fscal            = _mm_and_pd(fscal,cutoff_mask);
946
947             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
948
949             /* Update vectorial force */
950             fix2             = _mm_macc_pd(dx23,fscal,fix2);
951             fiy2             = _mm_macc_pd(dy23,fscal,fiy2);
952             fiz2             = _mm_macc_pd(dz23,fscal,fiz2);
953             
954             fjx3             = _mm_macc_pd(dx23,fscal,fjx3);
955             fjy3             = _mm_macc_pd(dy23,fscal,fjy3);
956             fjz3             = _mm_macc_pd(dz23,fscal,fjz3);
957
958             }
959
960             /**************************
961              * CALCULATE INTERACTIONS *
962              **************************/
963
964             if (gmx_mm_any_lt(rsq31,rcutoff2))
965             {
966
967             /* REACTION-FIELD ELECTROSTATICS */
968             velec            = _mm_mul_pd(qq31,_mm_sub_pd(_mm_macc_pd(krf,rsq31,rinv31),crf));
969             felec            = _mm_mul_pd(qq31,_mm_msub_pd(rinv31,rinvsq31,krf2));
970
971             cutoff_mask      = _mm_cmplt_pd(rsq31,rcutoff2);
972
973             /* Update potential sum for this i atom from the interaction with this j atom. */
974             velec            = _mm_and_pd(velec,cutoff_mask);
975             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
976             velecsum         = _mm_add_pd(velecsum,velec);
977
978             fscal            = felec;
979
980             fscal            = _mm_and_pd(fscal,cutoff_mask);
981
982             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
983
984             /* Update vectorial force */
985             fix3             = _mm_macc_pd(dx31,fscal,fix3);
986             fiy3             = _mm_macc_pd(dy31,fscal,fiy3);
987             fiz3             = _mm_macc_pd(dz31,fscal,fiz3);
988             
989             fjx1             = _mm_macc_pd(dx31,fscal,fjx1);
990             fjy1             = _mm_macc_pd(dy31,fscal,fjy1);
991             fjz1             = _mm_macc_pd(dz31,fscal,fjz1);
992
993             }
994
995             /**************************
996              * CALCULATE INTERACTIONS *
997              **************************/
998
999             if (gmx_mm_any_lt(rsq32,rcutoff2))
1000             {
1001
1002             /* REACTION-FIELD ELECTROSTATICS */
1003             velec            = _mm_mul_pd(qq32,_mm_sub_pd(_mm_macc_pd(krf,rsq32,rinv32),crf));
1004             felec            = _mm_mul_pd(qq32,_mm_msub_pd(rinv32,rinvsq32,krf2));
1005
1006             cutoff_mask      = _mm_cmplt_pd(rsq32,rcutoff2);
1007
1008             /* Update potential sum for this i atom from the interaction with this j atom. */
1009             velec            = _mm_and_pd(velec,cutoff_mask);
1010             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
1011             velecsum         = _mm_add_pd(velecsum,velec);
1012
1013             fscal            = felec;
1014
1015             fscal            = _mm_and_pd(fscal,cutoff_mask);
1016
1017             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1018
1019             /* Update vectorial force */
1020             fix3             = _mm_macc_pd(dx32,fscal,fix3);
1021             fiy3             = _mm_macc_pd(dy32,fscal,fiy3);
1022             fiz3             = _mm_macc_pd(dz32,fscal,fiz3);
1023             
1024             fjx2             = _mm_macc_pd(dx32,fscal,fjx2);
1025             fjy2             = _mm_macc_pd(dy32,fscal,fjy2);
1026             fjz2             = _mm_macc_pd(dz32,fscal,fjz2);
1027
1028             }
1029
1030             /**************************
1031              * CALCULATE INTERACTIONS *
1032              **************************/
1033
1034             if (gmx_mm_any_lt(rsq33,rcutoff2))
1035             {
1036
1037             /* REACTION-FIELD ELECTROSTATICS */
1038             velec            = _mm_mul_pd(qq33,_mm_sub_pd(_mm_macc_pd(krf,rsq33,rinv33),crf));
1039             felec            = _mm_mul_pd(qq33,_mm_msub_pd(rinv33,rinvsq33,krf2));
1040
1041             cutoff_mask      = _mm_cmplt_pd(rsq33,rcutoff2);
1042
1043             /* Update potential sum for this i atom from the interaction with this j atom. */
1044             velec            = _mm_and_pd(velec,cutoff_mask);
1045             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
1046             velecsum         = _mm_add_pd(velecsum,velec);
1047
1048             fscal            = felec;
1049
1050             fscal            = _mm_and_pd(fscal,cutoff_mask);
1051
1052             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1053
1054             /* Update vectorial force */
1055             fix3             = _mm_macc_pd(dx33,fscal,fix3);
1056             fiy3             = _mm_macc_pd(dy33,fscal,fiy3);
1057             fiz3             = _mm_macc_pd(dz33,fscal,fiz3);
1058             
1059             fjx3             = _mm_macc_pd(dx33,fscal,fjx3);
1060             fjy3             = _mm_macc_pd(dy33,fscal,fjy3);
1061             fjz3             = _mm_macc_pd(dz33,fscal,fjz3);
1062
1063             }
1064
1065             gmx_mm_decrement_4rvec_1ptr_swizzle_pd(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1066
1067             /* Inner loop uses 398 flops */
1068         }
1069
1070         /* End of innermost loop */
1071
1072         gmx_mm_update_iforce_4atom_swizzle_pd(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
1073                                               f+i_coord_offset,fshift+i_shift_offset);
1074
1075         ggid                        = gid[iidx];
1076         /* Update potential energies */
1077         gmx_mm_update_1pot_pd(velecsum,kernel_data->energygrp_elec+ggid);
1078         gmx_mm_update_1pot_pd(vvdwsum,kernel_data->energygrp_vdw+ggid);
1079
1080         /* Increment number of inner iterations */
1081         inneriter                  += j_index_end - j_index_start;
1082
1083         /* Outer loop uses 26 flops */
1084     }
1085
1086     /* Increment number of outer iterations */
1087     outeriter        += nri;
1088
1089     /* Update outer/inner flops */
1090
1091     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*398);
1092 }
1093 /*
1094  * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_avx_128_fma_double
1095  * Electrostatics interaction: ReactionField
1096  * VdW interaction:            LennardJones
1097  * Geometry:                   Water4-Water4
1098  * Calculate force/pot:        Force
1099  */
1100 void
1101 nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_avx_128_fma_double
1102                     (t_nblist * gmx_restrict                nlist,
1103                      rvec * gmx_restrict                    xx,
1104                      rvec * gmx_restrict                    ff,
1105                      t_forcerec * gmx_restrict              fr,
1106                      t_mdatoms * gmx_restrict               mdatoms,
1107                      nb_kernel_data_t * gmx_restrict        kernel_data,
1108                      t_nrnb * gmx_restrict                  nrnb)
1109 {
1110     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1111      * just 0 for non-waters.
1112      * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
1113      * jnr indices corresponding to data put in the four positions in the SIMD register.
1114      */
1115     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
1116     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1117     int              jnrA,jnrB;
1118     int              j_coord_offsetA,j_coord_offsetB;
1119     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
1120     real             rcutoff_scalar;
1121     real             *shiftvec,*fshift,*x,*f;
1122     __m128d          tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1123     int              vdwioffset0;
1124     __m128d          ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1125     int              vdwioffset1;
1126     __m128d          ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1127     int              vdwioffset2;
1128     __m128d          ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1129     int              vdwioffset3;
1130     __m128d          ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
1131     int              vdwjidx0A,vdwjidx0B;
1132     __m128d          jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1133     int              vdwjidx1A,vdwjidx1B;
1134     __m128d          jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1135     int              vdwjidx2A,vdwjidx2B;
1136     __m128d          jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1137     int              vdwjidx3A,vdwjidx3B;
1138     __m128d          jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
1139     __m128d          dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1140     __m128d          dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1141     __m128d          dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1142     __m128d          dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
1143     __m128d          dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1144     __m128d          dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1145     __m128d          dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
1146     __m128d          dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
1147     __m128d          dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
1148     __m128d          dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
1149     __m128d          velec,felec,velecsum,facel,crf,krf,krf2;
1150     real             *charge;
1151     int              nvdwtype;
1152     __m128d          rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
1153     int              *vdwtype;
1154     real             *vdwparam;
1155     __m128d          one_sixth   = _mm_set1_pd(1.0/6.0);
1156     __m128d          one_twelfth = _mm_set1_pd(1.0/12.0);
1157     __m128d          dummy_mask,cutoff_mask;
1158     __m128d          signbit   = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
1159     __m128d          one     = _mm_set1_pd(1.0);
1160     __m128d          two     = _mm_set1_pd(2.0);
1161     x                = xx[0];
1162     f                = ff[0];
1163
1164     nri              = nlist->nri;
1165     iinr             = nlist->iinr;
1166     jindex           = nlist->jindex;
1167     jjnr             = nlist->jjnr;
1168     shiftidx         = nlist->shift;
1169     gid              = nlist->gid;
1170     shiftvec         = fr->shift_vec[0];
1171     fshift           = fr->fshift[0];
1172     facel            = _mm_set1_pd(fr->epsfac);
1173     charge           = mdatoms->chargeA;
1174     krf              = _mm_set1_pd(fr->ic->k_rf);
1175     krf2             = _mm_set1_pd(fr->ic->k_rf*2.0);
1176     crf              = _mm_set1_pd(fr->ic->c_rf);
1177     nvdwtype         = fr->ntype;
1178     vdwparam         = fr->nbfp;
1179     vdwtype          = mdatoms->typeA;
1180
1181     /* Setup water-specific parameters */
1182     inr              = nlist->iinr[0];
1183     iq1              = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+1]));
1184     iq2              = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+2]));
1185     iq3              = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+3]));
1186     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
1187
1188     jq1              = _mm_set1_pd(charge[inr+1]);
1189     jq2              = _mm_set1_pd(charge[inr+2]);
1190     jq3              = _mm_set1_pd(charge[inr+3]);
1191     vdwjidx0A        = 2*vdwtype[inr+0];
1192     c6_00            = _mm_set1_pd(vdwparam[vdwioffset0+vdwjidx0A]);
1193     c12_00           = _mm_set1_pd(vdwparam[vdwioffset0+vdwjidx0A+1]);
1194     qq11             = _mm_mul_pd(iq1,jq1);
1195     qq12             = _mm_mul_pd(iq1,jq2);
1196     qq13             = _mm_mul_pd(iq1,jq3);
1197     qq21             = _mm_mul_pd(iq2,jq1);
1198     qq22             = _mm_mul_pd(iq2,jq2);
1199     qq23             = _mm_mul_pd(iq2,jq3);
1200     qq31             = _mm_mul_pd(iq3,jq1);
1201     qq32             = _mm_mul_pd(iq3,jq2);
1202     qq33             = _mm_mul_pd(iq3,jq3);
1203
1204     /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
1205     rcutoff_scalar   = fr->rcoulomb;
1206     rcutoff          = _mm_set1_pd(rcutoff_scalar);
1207     rcutoff2         = _mm_mul_pd(rcutoff,rcutoff);
1208
1209     sh_vdw_invrcut6  = _mm_set1_pd(fr->ic->sh_invrc6);
1210     rvdw             = _mm_set1_pd(fr->rvdw);
1211
1212     /* Avoid stupid compiler warnings */
1213     jnrA = jnrB = 0;
1214     j_coord_offsetA = 0;
1215     j_coord_offsetB = 0;
1216
1217     outeriter        = 0;
1218     inneriter        = 0;
1219
1220     /* Start outer loop over neighborlists */
1221     for(iidx=0; iidx<nri; iidx++)
1222     {
1223         /* Load shift vector for this list */
1224         i_shift_offset   = DIM*shiftidx[iidx];
1225
1226         /* Load limits for loop over neighbors */
1227         j_index_start    = jindex[iidx];
1228         j_index_end      = jindex[iidx+1];
1229
1230         /* Get outer coordinate index */
1231         inr              = iinr[iidx];
1232         i_coord_offset   = DIM*inr;
1233
1234         /* Load i particle coords and add shift vector */
1235         gmx_mm_load_shift_and_4rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,
1236                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
1237
1238         fix0             = _mm_setzero_pd();
1239         fiy0             = _mm_setzero_pd();
1240         fiz0             = _mm_setzero_pd();
1241         fix1             = _mm_setzero_pd();
1242         fiy1             = _mm_setzero_pd();
1243         fiz1             = _mm_setzero_pd();
1244         fix2             = _mm_setzero_pd();
1245         fiy2             = _mm_setzero_pd();
1246         fiz2             = _mm_setzero_pd();
1247         fix3             = _mm_setzero_pd();
1248         fiy3             = _mm_setzero_pd();
1249         fiz3             = _mm_setzero_pd();
1250
1251         /* Start inner kernel loop */
1252         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1253         {
1254
1255             /* Get j neighbor index, and coordinate index */
1256             jnrA             = jjnr[jidx];
1257             jnrB             = jjnr[jidx+1];
1258             j_coord_offsetA  = DIM*jnrA;
1259             j_coord_offsetB  = DIM*jnrB;
1260
1261             /* load j atom coordinates */
1262             gmx_mm_load_4rvec_2ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
1263                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
1264                                               &jy2,&jz2,&jx3,&jy3,&jz3);
1265
1266             /* Calculate displacement vector */
1267             dx00             = _mm_sub_pd(ix0,jx0);
1268             dy00             = _mm_sub_pd(iy0,jy0);
1269             dz00             = _mm_sub_pd(iz0,jz0);
1270             dx11             = _mm_sub_pd(ix1,jx1);
1271             dy11             = _mm_sub_pd(iy1,jy1);
1272             dz11             = _mm_sub_pd(iz1,jz1);
1273             dx12             = _mm_sub_pd(ix1,jx2);
1274             dy12             = _mm_sub_pd(iy1,jy2);
1275             dz12             = _mm_sub_pd(iz1,jz2);
1276             dx13             = _mm_sub_pd(ix1,jx3);
1277             dy13             = _mm_sub_pd(iy1,jy3);
1278             dz13             = _mm_sub_pd(iz1,jz3);
1279             dx21             = _mm_sub_pd(ix2,jx1);
1280             dy21             = _mm_sub_pd(iy2,jy1);
1281             dz21             = _mm_sub_pd(iz2,jz1);
1282             dx22             = _mm_sub_pd(ix2,jx2);
1283             dy22             = _mm_sub_pd(iy2,jy2);
1284             dz22             = _mm_sub_pd(iz2,jz2);
1285             dx23             = _mm_sub_pd(ix2,jx3);
1286             dy23             = _mm_sub_pd(iy2,jy3);
1287             dz23             = _mm_sub_pd(iz2,jz3);
1288             dx31             = _mm_sub_pd(ix3,jx1);
1289             dy31             = _mm_sub_pd(iy3,jy1);
1290             dz31             = _mm_sub_pd(iz3,jz1);
1291             dx32             = _mm_sub_pd(ix3,jx2);
1292             dy32             = _mm_sub_pd(iy3,jy2);
1293             dz32             = _mm_sub_pd(iz3,jz2);
1294             dx33             = _mm_sub_pd(ix3,jx3);
1295             dy33             = _mm_sub_pd(iy3,jy3);
1296             dz33             = _mm_sub_pd(iz3,jz3);
1297
1298             /* Calculate squared distance and things based on it */
1299             rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
1300             rsq11            = gmx_mm_calc_rsq_pd(dx11,dy11,dz11);
1301             rsq12            = gmx_mm_calc_rsq_pd(dx12,dy12,dz12);
1302             rsq13            = gmx_mm_calc_rsq_pd(dx13,dy13,dz13);
1303             rsq21            = gmx_mm_calc_rsq_pd(dx21,dy21,dz21);
1304             rsq22            = gmx_mm_calc_rsq_pd(dx22,dy22,dz22);
1305             rsq23            = gmx_mm_calc_rsq_pd(dx23,dy23,dz23);
1306             rsq31            = gmx_mm_calc_rsq_pd(dx31,dy31,dz31);
1307             rsq32            = gmx_mm_calc_rsq_pd(dx32,dy32,dz32);
1308             rsq33            = gmx_mm_calc_rsq_pd(dx33,dy33,dz33);
1309
1310             rinv11           = gmx_mm_invsqrt_pd(rsq11);
1311             rinv12           = gmx_mm_invsqrt_pd(rsq12);
1312             rinv13           = gmx_mm_invsqrt_pd(rsq13);
1313             rinv21           = gmx_mm_invsqrt_pd(rsq21);
1314             rinv22           = gmx_mm_invsqrt_pd(rsq22);
1315             rinv23           = gmx_mm_invsqrt_pd(rsq23);
1316             rinv31           = gmx_mm_invsqrt_pd(rsq31);
1317             rinv32           = gmx_mm_invsqrt_pd(rsq32);
1318             rinv33           = gmx_mm_invsqrt_pd(rsq33);
1319
1320             rinvsq00         = gmx_mm_inv_pd(rsq00);
1321             rinvsq11         = _mm_mul_pd(rinv11,rinv11);
1322             rinvsq12         = _mm_mul_pd(rinv12,rinv12);
1323             rinvsq13         = _mm_mul_pd(rinv13,rinv13);
1324             rinvsq21         = _mm_mul_pd(rinv21,rinv21);
1325             rinvsq22         = _mm_mul_pd(rinv22,rinv22);
1326             rinvsq23         = _mm_mul_pd(rinv23,rinv23);
1327             rinvsq31         = _mm_mul_pd(rinv31,rinv31);
1328             rinvsq32         = _mm_mul_pd(rinv32,rinv32);
1329             rinvsq33         = _mm_mul_pd(rinv33,rinv33);
1330
1331             fjx0             = _mm_setzero_pd();
1332             fjy0             = _mm_setzero_pd();
1333             fjz0             = _mm_setzero_pd();
1334             fjx1             = _mm_setzero_pd();
1335             fjy1             = _mm_setzero_pd();
1336             fjz1             = _mm_setzero_pd();
1337             fjx2             = _mm_setzero_pd();
1338             fjy2             = _mm_setzero_pd();
1339             fjz2             = _mm_setzero_pd();
1340             fjx3             = _mm_setzero_pd();
1341             fjy3             = _mm_setzero_pd();
1342             fjz3             = _mm_setzero_pd();
1343
1344             /**************************
1345              * CALCULATE INTERACTIONS *
1346              **************************/
1347
1348             if (gmx_mm_any_lt(rsq00,rcutoff2))
1349             {
1350
1351             /* LENNARD-JONES DISPERSION/REPULSION */
1352
1353             rinvsix          = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
1354             fvdw             = _mm_mul_pd(_mm_msub_pd(c12_00,rinvsix,c6_00),_mm_mul_pd(rinvsix,rinvsq00));
1355
1356             cutoff_mask      = _mm_cmplt_pd(rsq00,rcutoff2);
1357
1358             fscal            = fvdw;
1359
1360             fscal            = _mm_and_pd(fscal,cutoff_mask);
1361
1362             /* Update vectorial force */
1363             fix0             = _mm_macc_pd(dx00,fscal,fix0);
1364             fiy0             = _mm_macc_pd(dy00,fscal,fiy0);
1365             fiz0             = _mm_macc_pd(dz00,fscal,fiz0);
1366             
1367             fjx0             = _mm_macc_pd(dx00,fscal,fjx0);
1368             fjy0             = _mm_macc_pd(dy00,fscal,fjy0);
1369             fjz0             = _mm_macc_pd(dz00,fscal,fjz0);
1370
1371             }
1372
1373             /**************************
1374              * CALCULATE INTERACTIONS *
1375              **************************/
1376
1377             if (gmx_mm_any_lt(rsq11,rcutoff2))
1378             {
1379
1380             /* REACTION-FIELD ELECTROSTATICS */
1381             felec            = _mm_mul_pd(qq11,_mm_msub_pd(rinv11,rinvsq11,krf2));
1382
1383             cutoff_mask      = _mm_cmplt_pd(rsq11,rcutoff2);
1384
1385             fscal            = felec;
1386
1387             fscal            = _mm_and_pd(fscal,cutoff_mask);
1388
1389             /* Update vectorial force */
1390             fix1             = _mm_macc_pd(dx11,fscal,fix1);
1391             fiy1             = _mm_macc_pd(dy11,fscal,fiy1);
1392             fiz1             = _mm_macc_pd(dz11,fscal,fiz1);
1393             
1394             fjx1             = _mm_macc_pd(dx11,fscal,fjx1);
1395             fjy1             = _mm_macc_pd(dy11,fscal,fjy1);
1396             fjz1             = _mm_macc_pd(dz11,fscal,fjz1);
1397
1398             }
1399
1400             /**************************
1401              * CALCULATE INTERACTIONS *
1402              **************************/
1403
1404             if (gmx_mm_any_lt(rsq12,rcutoff2))
1405             {
1406
1407             /* REACTION-FIELD ELECTROSTATICS */
1408             felec            = _mm_mul_pd(qq12,_mm_msub_pd(rinv12,rinvsq12,krf2));
1409
1410             cutoff_mask      = _mm_cmplt_pd(rsq12,rcutoff2);
1411
1412             fscal            = felec;
1413
1414             fscal            = _mm_and_pd(fscal,cutoff_mask);
1415
1416             /* Update vectorial force */
1417             fix1             = _mm_macc_pd(dx12,fscal,fix1);
1418             fiy1             = _mm_macc_pd(dy12,fscal,fiy1);
1419             fiz1             = _mm_macc_pd(dz12,fscal,fiz1);
1420             
1421             fjx2             = _mm_macc_pd(dx12,fscal,fjx2);
1422             fjy2             = _mm_macc_pd(dy12,fscal,fjy2);
1423             fjz2             = _mm_macc_pd(dz12,fscal,fjz2);
1424
1425             }
1426
1427             /**************************
1428              * CALCULATE INTERACTIONS *
1429              **************************/
1430
1431             if (gmx_mm_any_lt(rsq13,rcutoff2))
1432             {
1433
1434             /* REACTION-FIELD ELECTROSTATICS */
1435             felec            = _mm_mul_pd(qq13,_mm_msub_pd(rinv13,rinvsq13,krf2));
1436
1437             cutoff_mask      = _mm_cmplt_pd(rsq13,rcutoff2);
1438
1439             fscal            = felec;
1440
1441             fscal            = _mm_and_pd(fscal,cutoff_mask);
1442
1443             /* Update vectorial force */
1444             fix1             = _mm_macc_pd(dx13,fscal,fix1);
1445             fiy1             = _mm_macc_pd(dy13,fscal,fiy1);
1446             fiz1             = _mm_macc_pd(dz13,fscal,fiz1);
1447             
1448             fjx3             = _mm_macc_pd(dx13,fscal,fjx3);
1449             fjy3             = _mm_macc_pd(dy13,fscal,fjy3);
1450             fjz3             = _mm_macc_pd(dz13,fscal,fjz3);
1451
1452             }
1453
1454             /**************************
1455              * CALCULATE INTERACTIONS *
1456              **************************/
1457
1458             if (gmx_mm_any_lt(rsq21,rcutoff2))
1459             {
1460
1461             /* REACTION-FIELD ELECTROSTATICS */
1462             felec            = _mm_mul_pd(qq21,_mm_msub_pd(rinv21,rinvsq21,krf2));
1463
1464             cutoff_mask      = _mm_cmplt_pd(rsq21,rcutoff2);
1465
1466             fscal            = felec;
1467
1468             fscal            = _mm_and_pd(fscal,cutoff_mask);
1469
1470             /* Update vectorial force */
1471             fix2             = _mm_macc_pd(dx21,fscal,fix2);
1472             fiy2             = _mm_macc_pd(dy21,fscal,fiy2);
1473             fiz2             = _mm_macc_pd(dz21,fscal,fiz2);
1474             
1475             fjx1             = _mm_macc_pd(dx21,fscal,fjx1);
1476             fjy1             = _mm_macc_pd(dy21,fscal,fjy1);
1477             fjz1             = _mm_macc_pd(dz21,fscal,fjz1);
1478
1479             }
1480
1481             /**************************
1482              * CALCULATE INTERACTIONS *
1483              **************************/
1484
1485             if (gmx_mm_any_lt(rsq22,rcutoff2))
1486             {
1487
1488             /* REACTION-FIELD ELECTROSTATICS */
1489             felec            = _mm_mul_pd(qq22,_mm_msub_pd(rinv22,rinvsq22,krf2));
1490
1491             cutoff_mask      = _mm_cmplt_pd(rsq22,rcutoff2);
1492
1493             fscal            = felec;
1494
1495             fscal            = _mm_and_pd(fscal,cutoff_mask);
1496
1497             /* Update vectorial force */
1498             fix2             = _mm_macc_pd(dx22,fscal,fix2);
1499             fiy2             = _mm_macc_pd(dy22,fscal,fiy2);
1500             fiz2             = _mm_macc_pd(dz22,fscal,fiz2);
1501             
1502             fjx2             = _mm_macc_pd(dx22,fscal,fjx2);
1503             fjy2             = _mm_macc_pd(dy22,fscal,fjy2);
1504             fjz2             = _mm_macc_pd(dz22,fscal,fjz2);
1505
1506             }
1507
1508             /**************************
1509              * CALCULATE INTERACTIONS *
1510              **************************/
1511
1512             if (gmx_mm_any_lt(rsq23,rcutoff2))
1513             {
1514
1515             /* REACTION-FIELD ELECTROSTATICS */
1516             felec            = _mm_mul_pd(qq23,_mm_msub_pd(rinv23,rinvsq23,krf2));
1517
1518             cutoff_mask      = _mm_cmplt_pd(rsq23,rcutoff2);
1519
1520             fscal            = felec;
1521
1522             fscal            = _mm_and_pd(fscal,cutoff_mask);
1523
1524             /* Update vectorial force */
1525             fix2             = _mm_macc_pd(dx23,fscal,fix2);
1526             fiy2             = _mm_macc_pd(dy23,fscal,fiy2);
1527             fiz2             = _mm_macc_pd(dz23,fscal,fiz2);
1528             
1529             fjx3             = _mm_macc_pd(dx23,fscal,fjx3);
1530             fjy3             = _mm_macc_pd(dy23,fscal,fjy3);
1531             fjz3             = _mm_macc_pd(dz23,fscal,fjz3);
1532
1533             }
1534
1535             /**************************
1536              * CALCULATE INTERACTIONS *
1537              **************************/
1538
1539             if (gmx_mm_any_lt(rsq31,rcutoff2))
1540             {
1541
1542             /* REACTION-FIELD ELECTROSTATICS */
1543             felec            = _mm_mul_pd(qq31,_mm_msub_pd(rinv31,rinvsq31,krf2));
1544
1545             cutoff_mask      = _mm_cmplt_pd(rsq31,rcutoff2);
1546
1547             fscal            = felec;
1548
1549             fscal            = _mm_and_pd(fscal,cutoff_mask);
1550
1551             /* Update vectorial force */
1552             fix3             = _mm_macc_pd(dx31,fscal,fix3);
1553             fiy3             = _mm_macc_pd(dy31,fscal,fiy3);
1554             fiz3             = _mm_macc_pd(dz31,fscal,fiz3);
1555             
1556             fjx1             = _mm_macc_pd(dx31,fscal,fjx1);
1557             fjy1             = _mm_macc_pd(dy31,fscal,fjy1);
1558             fjz1             = _mm_macc_pd(dz31,fscal,fjz1);
1559
1560             }
1561
1562             /**************************
1563              * CALCULATE INTERACTIONS *
1564              **************************/
1565
1566             if (gmx_mm_any_lt(rsq32,rcutoff2))
1567             {
1568
1569             /* REACTION-FIELD ELECTROSTATICS */
1570             felec            = _mm_mul_pd(qq32,_mm_msub_pd(rinv32,rinvsq32,krf2));
1571
1572             cutoff_mask      = _mm_cmplt_pd(rsq32,rcutoff2);
1573
1574             fscal            = felec;
1575
1576             fscal            = _mm_and_pd(fscal,cutoff_mask);
1577
1578             /* Update vectorial force */
1579             fix3             = _mm_macc_pd(dx32,fscal,fix3);
1580             fiy3             = _mm_macc_pd(dy32,fscal,fiy3);
1581             fiz3             = _mm_macc_pd(dz32,fscal,fiz3);
1582             
1583             fjx2             = _mm_macc_pd(dx32,fscal,fjx2);
1584             fjy2             = _mm_macc_pd(dy32,fscal,fjy2);
1585             fjz2             = _mm_macc_pd(dz32,fscal,fjz2);
1586
1587             }
1588
1589             /**************************
1590              * CALCULATE INTERACTIONS *
1591              **************************/
1592
1593             if (gmx_mm_any_lt(rsq33,rcutoff2))
1594             {
1595
1596             /* REACTION-FIELD ELECTROSTATICS */
1597             felec            = _mm_mul_pd(qq33,_mm_msub_pd(rinv33,rinvsq33,krf2));
1598
1599             cutoff_mask      = _mm_cmplt_pd(rsq33,rcutoff2);
1600
1601             fscal            = felec;
1602
1603             fscal            = _mm_and_pd(fscal,cutoff_mask);
1604
1605             /* Update vectorial force */
1606             fix3             = _mm_macc_pd(dx33,fscal,fix3);
1607             fiy3             = _mm_macc_pd(dy33,fscal,fiy3);
1608             fiz3             = _mm_macc_pd(dz33,fscal,fiz3);
1609             
1610             fjx3             = _mm_macc_pd(dx33,fscal,fjx3);
1611             fjy3             = _mm_macc_pd(dy33,fscal,fjy3);
1612             fjz3             = _mm_macc_pd(dz33,fscal,fjz3);
1613
1614             }
1615
1616             gmx_mm_decrement_4rvec_2ptr_swizzle_pd(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1617
1618             /* Inner loop uses 333 flops */
1619         }
1620
1621         if(jidx<j_index_end)
1622         {
1623
1624             jnrA             = jjnr[jidx];
1625             j_coord_offsetA  = DIM*jnrA;
1626
1627             /* load j atom coordinates */
1628             gmx_mm_load_4rvec_1ptr_swizzle_pd(x+j_coord_offsetA,
1629                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
1630                                               &jy2,&jz2,&jx3,&jy3,&jz3);
1631
1632             /* Calculate displacement vector */
1633             dx00             = _mm_sub_pd(ix0,jx0);
1634             dy00             = _mm_sub_pd(iy0,jy0);
1635             dz00             = _mm_sub_pd(iz0,jz0);
1636             dx11             = _mm_sub_pd(ix1,jx1);
1637             dy11             = _mm_sub_pd(iy1,jy1);
1638             dz11             = _mm_sub_pd(iz1,jz1);
1639             dx12             = _mm_sub_pd(ix1,jx2);
1640             dy12             = _mm_sub_pd(iy1,jy2);
1641             dz12             = _mm_sub_pd(iz1,jz2);
1642             dx13             = _mm_sub_pd(ix1,jx3);
1643             dy13             = _mm_sub_pd(iy1,jy3);
1644             dz13             = _mm_sub_pd(iz1,jz3);
1645             dx21             = _mm_sub_pd(ix2,jx1);
1646             dy21             = _mm_sub_pd(iy2,jy1);
1647             dz21             = _mm_sub_pd(iz2,jz1);
1648             dx22             = _mm_sub_pd(ix2,jx2);
1649             dy22             = _mm_sub_pd(iy2,jy2);
1650             dz22             = _mm_sub_pd(iz2,jz2);
1651             dx23             = _mm_sub_pd(ix2,jx3);
1652             dy23             = _mm_sub_pd(iy2,jy3);
1653             dz23             = _mm_sub_pd(iz2,jz3);
1654             dx31             = _mm_sub_pd(ix3,jx1);
1655             dy31             = _mm_sub_pd(iy3,jy1);
1656             dz31             = _mm_sub_pd(iz3,jz1);
1657             dx32             = _mm_sub_pd(ix3,jx2);
1658             dy32             = _mm_sub_pd(iy3,jy2);
1659             dz32             = _mm_sub_pd(iz3,jz2);
1660             dx33             = _mm_sub_pd(ix3,jx3);
1661             dy33             = _mm_sub_pd(iy3,jy3);
1662             dz33             = _mm_sub_pd(iz3,jz3);
1663
1664             /* Calculate squared distance and things based on it */
1665             rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
1666             rsq11            = gmx_mm_calc_rsq_pd(dx11,dy11,dz11);
1667             rsq12            = gmx_mm_calc_rsq_pd(dx12,dy12,dz12);
1668             rsq13            = gmx_mm_calc_rsq_pd(dx13,dy13,dz13);
1669             rsq21            = gmx_mm_calc_rsq_pd(dx21,dy21,dz21);
1670             rsq22            = gmx_mm_calc_rsq_pd(dx22,dy22,dz22);
1671             rsq23            = gmx_mm_calc_rsq_pd(dx23,dy23,dz23);
1672             rsq31            = gmx_mm_calc_rsq_pd(dx31,dy31,dz31);
1673             rsq32            = gmx_mm_calc_rsq_pd(dx32,dy32,dz32);
1674             rsq33            = gmx_mm_calc_rsq_pd(dx33,dy33,dz33);
1675
1676             rinv11           = gmx_mm_invsqrt_pd(rsq11);
1677             rinv12           = gmx_mm_invsqrt_pd(rsq12);
1678             rinv13           = gmx_mm_invsqrt_pd(rsq13);
1679             rinv21           = gmx_mm_invsqrt_pd(rsq21);
1680             rinv22           = gmx_mm_invsqrt_pd(rsq22);
1681             rinv23           = gmx_mm_invsqrt_pd(rsq23);
1682             rinv31           = gmx_mm_invsqrt_pd(rsq31);
1683             rinv32           = gmx_mm_invsqrt_pd(rsq32);
1684             rinv33           = gmx_mm_invsqrt_pd(rsq33);
1685
1686             rinvsq00         = gmx_mm_inv_pd(rsq00);
1687             rinvsq11         = _mm_mul_pd(rinv11,rinv11);
1688             rinvsq12         = _mm_mul_pd(rinv12,rinv12);
1689             rinvsq13         = _mm_mul_pd(rinv13,rinv13);
1690             rinvsq21         = _mm_mul_pd(rinv21,rinv21);
1691             rinvsq22         = _mm_mul_pd(rinv22,rinv22);
1692             rinvsq23         = _mm_mul_pd(rinv23,rinv23);
1693             rinvsq31         = _mm_mul_pd(rinv31,rinv31);
1694             rinvsq32         = _mm_mul_pd(rinv32,rinv32);
1695             rinvsq33         = _mm_mul_pd(rinv33,rinv33);
1696
1697             fjx0             = _mm_setzero_pd();
1698             fjy0             = _mm_setzero_pd();
1699             fjz0             = _mm_setzero_pd();
1700             fjx1             = _mm_setzero_pd();
1701             fjy1             = _mm_setzero_pd();
1702             fjz1             = _mm_setzero_pd();
1703             fjx2             = _mm_setzero_pd();
1704             fjy2             = _mm_setzero_pd();
1705             fjz2             = _mm_setzero_pd();
1706             fjx3             = _mm_setzero_pd();
1707             fjy3             = _mm_setzero_pd();
1708             fjz3             = _mm_setzero_pd();
1709
1710             /**************************
1711              * CALCULATE INTERACTIONS *
1712              **************************/
1713
1714             if (gmx_mm_any_lt(rsq00,rcutoff2))
1715             {
1716
1717             /* LENNARD-JONES DISPERSION/REPULSION */
1718
1719             rinvsix          = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
1720             fvdw             = _mm_mul_pd(_mm_msub_pd(c12_00,rinvsix,c6_00),_mm_mul_pd(rinvsix,rinvsq00));
1721
1722             cutoff_mask      = _mm_cmplt_pd(rsq00,rcutoff2);
1723
1724             fscal            = fvdw;
1725
1726             fscal            = _mm_and_pd(fscal,cutoff_mask);
1727
1728             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1729
1730             /* Update vectorial force */
1731             fix0             = _mm_macc_pd(dx00,fscal,fix0);
1732             fiy0             = _mm_macc_pd(dy00,fscal,fiy0);
1733             fiz0             = _mm_macc_pd(dz00,fscal,fiz0);
1734             
1735             fjx0             = _mm_macc_pd(dx00,fscal,fjx0);
1736             fjy0             = _mm_macc_pd(dy00,fscal,fjy0);
1737             fjz0             = _mm_macc_pd(dz00,fscal,fjz0);
1738
1739             }
1740
1741             /**************************
1742              * CALCULATE INTERACTIONS *
1743              **************************/
1744
1745             if (gmx_mm_any_lt(rsq11,rcutoff2))
1746             {
1747
1748             /* REACTION-FIELD ELECTROSTATICS */
1749             felec            = _mm_mul_pd(qq11,_mm_msub_pd(rinv11,rinvsq11,krf2));
1750
1751             cutoff_mask      = _mm_cmplt_pd(rsq11,rcutoff2);
1752
1753             fscal            = felec;
1754
1755             fscal            = _mm_and_pd(fscal,cutoff_mask);
1756
1757             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1758
1759             /* Update vectorial force */
1760             fix1             = _mm_macc_pd(dx11,fscal,fix1);
1761             fiy1             = _mm_macc_pd(dy11,fscal,fiy1);
1762             fiz1             = _mm_macc_pd(dz11,fscal,fiz1);
1763             
1764             fjx1             = _mm_macc_pd(dx11,fscal,fjx1);
1765             fjy1             = _mm_macc_pd(dy11,fscal,fjy1);
1766             fjz1             = _mm_macc_pd(dz11,fscal,fjz1);
1767
1768             }
1769
1770             /**************************
1771              * CALCULATE INTERACTIONS *
1772              **************************/
1773
1774             if (gmx_mm_any_lt(rsq12,rcutoff2))
1775             {
1776
1777             /* REACTION-FIELD ELECTROSTATICS */
1778             felec            = _mm_mul_pd(qq12,_mm_msub_pd(rinv12,rinvsq12,krf2));
1779
1780             cutoff_mask      = _mm_cmplt_pd(rsq12,rcutoff2);
1781
1782             fscal            = felec;
1783
1784             fscal            = _mm_and_pd(fscal,cutoff_mask);
1785
1786             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1787
1788             /* Update vectorial force */
1789             fix1             = _mm_macc_pd(dx12,fscal,fix1);
1790             fiy1             = _mm_macc_pd(dy12,fscal,fiy1);
1791             fiz1             = _mm_macc_pd(dz12,fscal,fiz1);
1792             
1793             fjx2             = _mm_macc_pd(dx12,fscal,fjx2);
1794             fjy2             = _mm_macc_pd(dy12,fscal,fjy2);
1795             fjz2             = _mm_macc_pd(dz12,fscal,fjz2);
1796
1797             }
1798
1799             /**************************
1800              * CALCULATE INTERACTIONS *
1801              **************************/
1802
1803             if (gmx_mm_any_lt(rsq13,rcutoff2))
1804             {
1805
1806             /* REACTION-FIELD ELECTROSTATICS */
1807             felec            = _mm_mul_pd(qq13,_mm_msub_pd(rinv13,rinvsq13,krf2));
1808
1809             cutoff_mask      = _mm_cmplt_pd(rsq13,rcutoff2);
1810
1811             fscal            = felec;
1812
1813             fscal            = _mm_and_pd(fscal,cutoff_mask);
1814
1815             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1816
1817             /* Update vectorial force */
1818             fix1             = _mm_macc_pd(dx13,fscal,fix1);
1819             fiy1             = _mm_macc_pd(dy13,fscal,fiy1);
1820             fiz1             = _mm_macc_pd(dz13,fscal,fiz1);
1821             
1822             fjx3             = _mm_macc_pd(dx13,fscal,fjx3);
1823             fjy3             = _mm_macc_pd(dy13,fscal,fjy3);
1824             fjz3             = _mm_macc_pd(dz13,fscal,fjz3);
1825
1826             }
1827
1828             /**************************
1829              * CALCULATE INTERACTIONS *
1830              **************************/
1831
1832             if (gmx_mm_any_lt(rsq21,rcutoff2))
1833             {
1834
1835             /* REACTION-FIELD ELECTROSTATICS */
1836             felec            = _mm_mul_pd(qq21,_mm_msub_pd(rinv21,rinvsq21,krf2));
1837
1838             cutoff_mask      = _mm_cmplt_pd(rsq21,rcutoff2);
1839
1840             fscal            = felec;
1841
1842             fscal            = _mm_and_pd(fscal,cutoff_mask);
1843
1844             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1845
1846             /* Update vectorial force */
1847             fix2             = _mm_macc_pd(dx21,fscal,fix2);
1848             fiy2             = _mm_macc_pd(dy21,fscal,fiy2);
1849             fiz2             = _mm_macc_pd(dz21,fscal,fiz2);
1850             
1851             fjx1             = _mm_macc_pd(dx21,fscal,fjx1);
1852             fjy1             = _mm_macc_pd(dy21,fscal,fjy1);
1853             fjz1             = _mm_macc_pd(dz21,fscal,fjz1);
1854
1855             }
1856
1857             /**************************
1858              * CALCULATE INTERACTIONS *
1859              **************************/
1860
1861             if (gmx_mm_any_lt(rsq22,rcutoff2))
1862             {
1863
1864             /* REACTION-FIELD ELECTROSTATICS */
1865             felec            = _mm_mul_pd(qq22,_mm_msub_pd(rinv22,rinvsq22,krf2));
1866
1867             cutoff_mask      = _mm_cmplt_pd(rsq22,rcutoff2);
1868
1869             fscal            = felec;
1870
1871             fscal            = _mm_and_pd(fscal,cutoff_mask);
1872
1873             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1874
1875             /* Update vectorial force */
1876             fix2             = _mm_macc_pd(dx22,fscal,fix2);
1877             fiy2             = _mm_macc_pd(dy22,fscal,fiy2);
1878             fiz2             = _mm_macc_pd(dz22,fscal,fiz2);
1879             
1880             fjx2             = _mm_macc_pd(dx22,fscal,fjx2);
1881             fjy2             = _mm_macc_pd(dy22,fscal,fjy2);
1882             fjz2             = _mm_macc_pd(dz22,fscal,fjz2);
1883
1884             }
1885
1886             /**************************
1887              * CALCULATE INTERACTIONS *
1888              **************************/
1889
1890             if (gmx_mm_any_lt(rsq23,rcutoff2))
1891             {
1892
1893             /* REACTION-FIELD ELECTROSTATICS */
1894             felec            = _mm_mul_pd(qq23,_mm_msub_pd(rinv23,rinvsq23,krf2));
1895
1896             cutoff_mask      = _mm_cmplt_pd(rsq23,rcutoff2);
1897
1898             fscal            = felec;
1899
1900             fscal            = _mm_and_pd(fscal,cutoff_mask);
1901
1902             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1903
1904             /* Update vectorial force */
1905             fix2             = _mm_macc_pd(dx23,fscal,fix2);
1906             fiy2             = _mm_macc_pd(dy23,fscal,fiy2);
1907             fiz2             = _mm_macc_pd(dz23,fscal,fiz2);
1908             
1909             fjx3             = _mm_macc_pd(dx23,fscal,fjx3);
1910             fjy3             = _mm_macc_pd(dy23,fscal,fjy3);
1911             fjz3             = _mm_macc_pd(dz23,fscal,fjz3);
1912
1913             }
1914
1915             /**************************
1916              * CALCULATE INTERACTIONS *
1917              **************************/
1918
1919             if (gmx_mm_any_lt(rsq31,rcutoff2))
1920             {
1921
1922             /* REACTION-FIELD ELECTROSTATICS */
1923             felec            = _mm_mul_pd(qq31,_mm_msub_pd(rinv31,rinvsq31,krf2));
1924
1925             cutoff_mask      = _mm_cmplt_pd(rsq31,rcutoff2);
1926
1927             fscal            = felec;
1928
1929             fscal            = _mm_and_pd(fscal,cutoff_mask);
1930
1931             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1932
1933             /* Update vectorial force */
1934             fix3             = _mm_macc_pd(dx31,fscal,fix3);
1935             fiy3             = _mm_macc_pd(dy31,fscal,fiy3);
1936             fiz3             = _mm_macc_pd(dz31,fscal,fiz3);
1937             
1938             fjx1             = _mm_macc_pd(dx31,fscal,fjx1);
1939             fjy1             = _mm_macc_pd(dy31,fscal,fjy1);
1940             fjz1             = _mm_macc_pd(dz31,fscal,fjz1);
1941
1942             }
1943
1944             /**************************
1945              * CALCULATE INTERACTIONS *
1946              **************************/
1947
1948             if (gmx_mm_any_lt(rsq32,rcutoff2))
1949             {
1950
1951             /* REACTION-FIELD ELECTROSTATICS */
1952             felec            = _mm_mul_pd(qq32,_mm_msub_pd(rinv32,rinvsq32,krf2));
1953
1954             cutoff_mask      = _mm_cmplt_pd(rsq32,rcutoff2);
1955
1956             fscal            = felec;
1957
1958             fscal            = _mm_and_pd(fscal,cutoff_mask);
1959
1960             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1961
1962             /* Update vectorial force */
1963             fix3             = _mm_macc_pd(dx32,fscal,fix3);
1964             fiy3             = _mm_macc_pd(dy32,fscal,fiy3);
1965             fiz3             = _mm_macc_pd(dz32,fscal,fiz3);
1966             
1967             fjx2             = _mm_macc_pd(dx32,fscal,fjx2);
1968             fjy2             = _mm_macc_pd(dy32,fscal,fjy2);
1969             fjz2             = _mm_macc_pd(dz32,fscal,fjz2);
1970
1971             }
1972
1973             /**************************
1974              * CALCULATE INTERACTIONS *
1975              **************************/
1976
1977             if (gmx_mm_any_lt(rsq33,rcutoff2))
1978             {
1979
1980             /* REACTION-FIELD ELECTROSTATICS */
1981             felec            = _mm_mul_pd(qq33,_mm_msub_pd(rinv33,rinvsq33,krf2));
1982
1983             cutoff_mask      = _mm_cmplt_pd(rsq33,rcutoff2);
1984
1985             fscal            = felec;
1986
1987             fscal            = _mm_and_pd(fscal,cutoff_mask);
1988
1989             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1990
1991             /* Update vectorial force */
1992             fix3             = _mm_macc_pd(dx33,fscal,fix3);
1993             fiy3             = _mm_macc_pd(dy33,fscal,fiy3);
1994             fiz3             = _mm_macc_pd(dz33,fscal,fiz3);
1995             
1996             fjx3             = _mm_macc_pd(dx33,fscal,fjx3);
1997             fjy3             = _mm_macc_pd(dy33,fscal,fjy3);
1998             fjz3             = _mm_macc_pd(dz33,fscal,fjz3);
1999
2000             }
2001
2002             gmx_mm_decrement_4rvec_1ptr_swizzle_pd(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
2003
2004             /* Inner loop uses 333 flops */
2005         }
2006
2007         /* End of innermost loop */
2008
2009         gmx_mm_update_iforce_4atom_swizzle_pd(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
2010                                               f+i_coord_offset,fshift+i_shift_offset);
2011
2012         /* Increment number of inner iterations */
2013         inneriter                  += j_index_end - j_index_start;
2014
2015         /* Outer loop uses 24 flops */
2016     }
2017
2018     /* Increment number of outer iterations */
2019     outeriter        += nri;
2020
2021     /* Update outer/inner flops */
2022
2023     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*333);
2024 }