Merge release-4-6 into master
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_sse4_1_double / nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_sse4_1_double.c
1 /*
2  * Note: this file was generated by the Gromacs sse4_1_double kernel generator.
3  *
4  *                This source code is part of
5  *
6  *                 G   R   O   M   A   C   S
7  *
8  * Copyright (c) 2001-2012, The GROMACS Development Team
9  *
10  * Gromacs is a library for molecular simulation and trajectory analysis,
11  * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
12  * a full list of developers and information, check out http://www.gromacs.org
13  *
14  * This program is free software; you can redistribute it and/or modify it under
15  * the terms of the GNU Lesser General Public License as published by the Free
16  * Software Foundation; either version 2 of the License, or (at your option) any
17  * later version.
18  *
19  * To help fund GROMACS development, we humbly ask that you cite
20  * the papers people have written on it - you can find them on the website.
21  */
22 #ifdef HAVE_CONFIG_H
23 #include <config.h>
24 #endif
25
26 #include <math.h>
27
28 #include "../nb_kernel.h"
29 #include "types/simple.h"
30 #include "vec.h"
31 #include "nrnb.h"
32
33 #include "gmx_math_x86_sse4_1_double.h"
34 #include "kernelutil_x86_sse4_1_double.h"
35
36 /*
37  * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_sse4_1_double
38  * Electrostatics interaction: ReactionField
39  * VdW interaction:            LennardJones
40  * Geometry:                   Water4-Water4
41  * Calculate force/pot:        PotentialAndForce
42  */
43 void
44 nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_sse4_1_double
45                     (t_nblist * gmx_restrict                nlist,
46                      rvec * gmx_restrict                    xx,
47                      rvec * gmx_restrict                    ff,
48                      t_forcerec * gmx_restrict              fr,
49                      t_mdatoms * gmx_restrict               mdatoms,
50                      nb_kernel_data_t * gmx_restrict        kernel_data,
51                      t_nrnb * gmx_restrict                  nrnb)
52 {
53     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
54      * just 0 for non-waters.
55      * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
56      * jnr indices corresponding to data put in the four positions in the SIMD register.
57      */
58     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
59     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
60     int              jnrA,jnrB;
61     int              j_coord_offsetA,j_coord_offsetB;
62     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
63     real             rcutoff_scalar;
64     real             *shiftvec,*fshift,*x,*f;
65     __m128d          tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
66     int              vdwioffset0;
67     __m128d          ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
68     int              vdwioffset1;
69     __m128d          ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
70     int              vdwioffset2;
71     __m128d          ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
72     int              vdwioffset3;
73     __m128d          ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
74     int              vdwjidx0A,vdwjidx0B;
75     __m128d          jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
76     int              vdwjidx1A,vdwjidx1B;
77     __m128d          jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
78     int              vdwjidx2A,vdwjidx2B;
79     __m128d          jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
80     int              vdwjidx3A,vdwjidx3B;
81     __m128d          jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
82     __m128d          dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
83     __m128d          dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
84     __m128d          dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
85     __m128d          dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
86     __m128d          dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
87     __m128d          dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
88     __m128d          dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
89     __m128d          dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
90     __m128d          dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
91     __m128d          dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
92     __m128d          velec,felec,velecsum,facel,crf,krf,krf2;
93     real             *charge;
94     int              nvdwtype;
95     __m128d          rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
96     int              *vdwtype;
97     real             *vdwparam;
98     __m128d          one_sixth   = _mm_set1_pd(1.0/6.0);
99     __m128d          one_twelfth = _mm_set1_pd(1.0/12.0);
100     __m128d          dummy_mask,cutoff_mask;
101     __m128d          signbit   = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
102     __m128d          one     = _mm_set1_pd(1.0);
103     __m128d          two     = _mm_set1_pd(2.0);
104     x                = xx[0];
105     f                = ff[0];
106
107     nri              = nlist->nri;
108     iinr             = nlist->iinr;
109     jindex           = nlist->jindex;
110     jjnr             = nlist->jjnr;
111     shiftidx         = nlist->shift;
112     gid              = nlist->gid;
113     shiftvec         = fr->shift_vec[0];
114     fshift           = fr->fshift[0];
115     facel            = _mm_set1_pd(fr->epsfac);
116     charge           = mdatoms->chargeA;
117     krf              = _mm_set1_pd(fr->ic->k_rf);
118     krf2             = _mm_set1_pd(fr->ic->k_rf*2.0);
119     crf              = _mm_set1_pd(fr->ic->c_rf);
120     nvdwtype         = fr->ntype;
121     vdwparam         = fr->nbfp;
122     vdwtype          = mdatoms->typeA;
123
124     /* Setup water-specific parameters */
125     inr              = nlist->iinr[0];
126     iq1              = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+1]));
127     iq2              = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+2]));
128     iq3              = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+3]));
129     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
130
131     jq1              = _mm_set1_pd(charge[inr+1]);
132     jq2              = _mm_set1_pd(charge[inr+2]);
133     jq3              = _mm_set1_pd(charge[inr+3]);
134     vdwjidx0A        = 2*vdwtype[inr+0];
135     c6_00            = _mm_set1_pd(vdwparam[vdwioffset0+vdwjidx0A]);
136     c12_00           = _mm_set1_pd(vdwparam[vdwioffset0+vdwjidx0A+1]);
137     qq11             = _mm_mul_pd(iq1,jq1);
138     qq12             = _mm_mul_pd(iq1,jq2);
139     qq13             = _mm_mul_pd(iq1,jq3);
140     qq21             = _mm_mul_pd(iq2,jq1);
141     qq22             = _mm_mul_pd(iq2,jq2);
142     qq23             = _mm_mul_pd(iq2,jq3);
143     qq31             = _mm_mul_pd(iq3,jq1);
144     qq32             = _mm_mul_pd(iq3,jq2);
145     qq33             = _mm_mul_pd(iq3,jq3);
146
147     /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
148     rcutoff_scalar   = fr->rcoulomb;
149     rcutoff          = _mm_set1_pd(rcutoff_scalar);
150     rcutoff2         = _mm_mul_pd(rcutoff,rcutoff);
151
152     sh_vdw_invrcut6  = _mm_set1_pd(fr->ic->sh_invrc6);
153     rvdw             = _mm_set1_pd(fr->rvdw);
154
155     /* Avoid stupid compiler warnings */
156     jnrA = jnrB = 0;
157     j_coord_offsetA = 0;
158     j_coord_offsetB = 0;
159
160     outeriter        = 0;
161     inneriter        = 0;
162
163     /* Start outer loop over neighborlists */
164     for(iidx=0; iidx<nri; iidx++)
165     {
166         /* Load shift vector for this list */
167         i_shift_offset   = DIM*shiftidx[iidx];
168
169         /* Load limits for loop over neighbors */
170         j_index_start    = jindex[iidx];
171         j_index_end      = jindex[iidx+1];
172
173         /* Get outer coordinate index */
174         inr              = iinr[iidx];
175         i_coord_offset   = DIM*inr;
176
177         /* Load i particle coords and add shift vector */
178         gmx_mm_load_shift_and_4rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,
179                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
180
181         fix0             = _mm_setzero_pd();
182         fiy0             = _mm_setzero_pd();
183         fiz0             = _mm_setzero_pd();
184         fix1             = _mm_setzero_pd();
185         fiy1             = _mm_setzero_pd();
186         fiz1             = _mm_setzero_pd();
187         fix2             = _mm_setzero_pd();
188         fiy2             = _mm_setzero_pd();
189         fiz2             = _mm_setzero_pd();
190         fix3             = _mm_setzero_pd();
191         fiy3             = _mm_setzero_pd();
192         fiz3             = _mm_setzero_pd();
193
194         /* Reset potential sums */
195         velecsum         = _mm_setzero_pd();
196         vvdwsum          = _mm_setzero_pd();
197
198         /* Start inner kernel loop */
199         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
200         {
201
202             /* Get j neighbor index, and coordinate index */
203             jnrA             = jjnr[jidx];
204             jnrB             = jjnr[jidx+1];
205             j_coord_offsetA  = DIM*jnrA;
206             j_coord_offsetB  = DIM*jnrB;
207
208             /* load j atom coordinates */
209             gmx_mm_load_4rvec_2ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
210                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
211                                               &jy2,&jz2,&jx3,&jy3,&jz3);
212
213             /* Calculate displacement vector */
214             dx00             = _mm_sub_pd(ix0,jx0);
215             dy00             = _mm_sub_pd(iy0,jy0);
216             dz00             = _mm_sub_pd(iz0,jz0);
217             dx11             = _mm_sub_pd(ix1,jx1);
218             dy11             = _mm_sub_pd(iy1,jy1);
219             dz11             = _mm_sub_pd(iz1,jz1);
220             dx12             = _mm_sub_pd(ix1,jx2);
221             dy12             = _mm_sub_pd(iy1,jy2);
222             dz12             = _mm_sub_pd(iz1,jz2);
223             dx13             = _mm_sub_pd(ix1,jx3);
224             dy13             = _mm_sub_pd(iy1,jy3);
225             dz13             = _mm_sub_pd(iz1,jz3);
226             dx21             = _mm_sub_pd(ix2,jx1);
227             dy21             = _mm_sub_pd(iy2,jy1);
228             dz21             = _mm_sub_pd(iz2,jz1);
229             dx22             = _mm_sub_pd(ix2,jx2);
230             dy22             = _mm_sub_pd(iy2,jy2);
231             dz22             = _mm_sub_pd(iz2,jz2);
232             dx23             = _mm_sub_pd(ix2,jx3);
233             dy23             = _mm_sub_pd(iy2,jy3);
234             dz23             = _mm_sub_pd(iz2,jz3);
235             dx31             = _mm_sub_pd(ix3,jx1);
236             dy31             = _mm_sub_pd(iy3,jy1);
237             dz31             = _mm_sub_pd(iz3,jz1);
238             dx32             = _mm_sub_pd(ix3,jx2);
239             dy32             = _mm_sub_pd(iy3,jy2);
240             dz32             = _mm_sub_pd(iz3,jz2);
241             dx33             = _mm_sub_pd(ix3,jx3);
242             dy33             = _mm_sub_pd(iy3,jy3);
243             dz33             = _mm_sub_pd(iz3,jz3);
244
245             /* Calculate squared distance and things based on it */
246             rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
247             rsq11            = gmx_mm_calc_rsq_pd(dx11,dy11,dz11);
248             rsq12            = gmx_mm_calc_rsq_pd(dx12,dy12,dz12);
249             rsq13            = gmx_mm_calc_rsq_pd(dx13,dy13,dz13);
250             rsq21            = gmx_mm_calc_rsq_pd(dx21,dy21,dz21);
251             rsq22            = gmx_mm_calc_rsq_pd(dx22,dy22,dz22);
252             rsq23            = gmx_mm_calc_rsq_pd(dx23,dy23,dz23);
253             rsq31            = gmx_mm_calc_rsq_pd(dx31,dy31,dz31);
254             rsq32            = gmx_mm_calc_rsq_pd(dx32,dy32,dz32);
255             rsq33            = gmx_mm_calc_rsq_pd(dx33,dy33,dz33);
256
257             rinv11           = gmx_mm_invsqrt_pd(rsq11);
258             rinv12           = gmx_mm_invsqrt_pd(rsq12);
259             rinv13           = gmx_mm_invsqrt_pd(rsq13);
260             rinv21           = gmx_mm_invsqrt_pd(rsq21);
261             rinv22           = gmx_mm_invsqrt_pd(rsq22);
262             rinv23           = gmx_mm_invsqrt_pd(rsq23);
263             rinv31           = gmx_mm_invsqrt_pd(rsq31);
264             rinv32           = gmx_mm_invsqrt_pd(rsq32);
265             rinv33           = gmx_mm_invsqrt_pd(rsq33);
266
267             rinvsq00         = gmx_mm_inv_pd(rsq00);
268             rinvsq11         = _mm_mul_pd(rinv11,rinv11);
269             rinvsq12         = _mm_mul_pd(rinv12,rinv12);
270             rinvsq13         = _mm_mul_pd(rinv13,rinv13);
271             rinvsq21         = _mm_mul_pd(rinv21,rinv21);
272             rinvsq22         = _mm_mul_pd(rinv22,rinv22);
273             rinvsq23         = _mm_mul_pd(rinv23,rinv23);
274             rinvsq31         = _mm_mul_pd(rinv31,rinv31);
275             rinvsq32         = _mm_mul_pd(rinv32,rinv32);
276             rinvsq33         = _mm_mul_pd(rinv33,rinv33);
277
278             fjx0             = _mm_setzero_pd();
279             fjy0             = _mm_setzero_pd();
280             fjz0             = _mm_setzero_pd();
281             fjx1             = _mm_setzero_pd();
282             fjy1             = _mm_setzero_pd();
283             fjz1             = _mm_setzero_pd();
284             fjx2             = _mm_setzero_pd();
285             fjy2             = _mm_setzero_pd();
286             fjz2             = _mm_setzero_pd();
287             fjx3             = _mm_setzero_pd();
288             fjy3             = _mm_setzero_pd();
289             fjz3             = _mm_setzero_pd();
290
291             /**************************
292              * CALCULATE INTERACTIONS *
293              **************************/
294
295             if (gmx_mm_any_lt(rsq00,rcutoff2))
296             {
297
298             /* LENNARD-JONES DISPERSION/REPULSION */
299
300             rinvsix          = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
301             vvdw6            = _mm_mul_pd(c6_00,rinvsix);
302             vvdw12           = _mm_mul_pd(c12_00,_mm_mul_pd(rinvsix,rinvsix));
303             vvdw             = _mm_sub_pd(_mm_mul_pd( _mm_sub_pd(vvdw12 , _mm_mul_pd(c12_00,_mm_mul_pd(sh_vdw_invrcut6,sh_vdw_invrcut6))), one_twelfth) ,
304                                           _mm_mul_pd( _mm_sub_pd(vvdw6,_mm_mul_pd(c6_00,sh_vdw_invrcut6)),one_sixth));
305             fvdw             = _mm_mul_pd(_mm_sub_pd(vvdw12,vvdw6),rinvsq00);
306
307             cutoff_mask      = _mm_cmplt_pd(rsq00,rcutoff2);
308
309             /* Update potential sum for this i atom from the interaction with this j atom. */
310             vvdw             = _mm_and_pd(vvdw,cutoff_mask);
311             vvdwsum          = _mm_add_pd(vvdwsum,vvdw);
312
313             fscal            = fvdw;
314
315             fscal            = _mm_and_pd(fscal,cutoff_mask);
316
317             /* Calculate temporary vectorial force */
318             tx               = _mm_mul_pd(fscal,dx00);
319             ty               = _mm_mul_pd(fscal,dy00);
320             tz               = _mm_mul_pd(fscal,dz00);
321
322             /* Update vectorial force */
323             fix0             = _mm_add_pd(fix0,tx);
324             fiy0             = _mm_add_pd(fiy0,ty);
325             fiz0             = _mm_add_pd(fiz0,tz);
326
327             fjx0             = _mm_add_pd(fjx0,tx);
328             fjy0             = _mm_add_pd(fjy0,ty);
329             fjz0             = _mm_add_pd(fjz0,tz);
330
331             }
332
333             /**************************
334              * CALCULATE INTERACTIONS *
335              **************************/
336
337             if (gmx_mm_any_lt(rsq11,rcutoff2))
338             {
339
340             /* REACTION-FIELD ELECTROSTATICS */
341             velec            = _mm_mul_pd(qq11,_mm_sub_pd(_mm_add_pd(rinv11,_mm_mul_pd(krf,rsq11)),crf));
342             felec            = _mm_mul_pd(qq11,_mm_sub_pd(_mm_mul_pd(rinv11,rinvsq11),krf2));
343
344             cutoff_mask      = _mm_cmplt_pd(rsq11,rcutoff2);
345
346             /* Update potential sum for this i atom from the interaction with this j atom. */
347             velec            = _mm_and_pd(velec,cutoff_mask);
348             velecsum         = _mm_add_pd(velecsum,velec);
349
350             fscal            = felec;
351
352             fscal            = _mm_and_pd(fscal,cutoff_mask);
353
354             /* Calculate temporary vectorial force */
355             tx               = _mm_mul_pd(fscal,dx11);
356             ty               = _mm_mul_pd(fscal,dy11);
357             tz               = _mm_mul_pd(fscal,dz11);
358
359             /* Update vectorial force */
360             fix1             = _mm_add_pd(fix1,tx);
361             fiy1             = _mm_add_pd(fiy1,ty);
362             fiz1             = _mm_add_pd(fiz1,tz);
363
364             fjx1             = _mm_add_pd(fjx1,tx);
365             fjy1             = _mm_add_pd(fjy1,ty);
366             fjz1             = _mm_add_pd(fjz1,tz);
367
368             }
369
370             /**************************
371              * CALCULATE INTERACTIONS *
372              **************************/
373
374             if (gmx_mm_any_lt(rsq12,rcutoff2))
375             {
376
377             /* REACTION-FIELD ELECTROSTATICS */
378             velec            = _mm_mul_pd(qq12,_mm_sub_pd(_mm_add_pd(rinv12,_mm_mul_pd(krf,rsq12)),crf));
379             felec            = _mm_mul_pd(qq12,_mm_sub_pd(_mm_mul_pd(rinv12,rinvsq12),krf2));
380
381             cutoff_mask      = _mm_cmplt_pd(rsq12,rcutoff2);
382
383             /* Update potential sum for this i atom from the interaction with this j atom. */
384             velec            = _mm_and_pd(velec,cutoff_mask);
385             velecsum         = _mm_add_pd(velecsum,velec);
386
387             fscal            = felec;
388
389             fscal            = _mm_and_pd(fscal,cutoff_mask);
390
391             /* Calculate temporary vectorial force */
392             tx               = _mm_mul_pd(fscal,dx12);
393             ty               = _mm_mul_pd(fscal,dy12);
394             tz               = _mm_mul_pd(fscal,dz12);
395
396             /* Update vectorial force */
397             fix1             = _mm_add_pd(fix1,tx);
398             fiy1             = _mm_add_pd(fiy1,ty);
399             fiz1             = _mm_add_pd(fiz1,tz);
400
401             fjx2             = _mm_add_pd(fjx2,tx);
402             fjy2             = _mm_add_pd(fjy2,ty);
403             fjz2             = _mm_add_pd(fjz2,tz);
404
405             }
406
407             /**************************
408              * CALCULATE INTERACTIONS *
409              **************************/
410
411             if (gmx_mm_any_lt(rsq13,rcutoff2))
412             {
413
414             /* REACTION-FIELD ELECTROSTATICS */
415             velec            = _mm_mul_pd(qq13,_mm_sub_pd(_mm_add_pd(rinv13,_mm_mul_pd(krf,rsq13)),crf));
416             felec            = _mm_mul_pd(qq13,_mm_sub_pd(_mm_mul_pd(rinv13,rinvsq13),krf2));
417
418             cutoff_mask      = _mm_cmplt_pd(rsq13,rcutoff2);
419
420             /* Update potential sum for this i atom from the interaction with this j atom. */
421             velec            = _mm_and_pd(velec,cutoff_mask);
422             velecsum         = _mm_add_pd(velecsum,velec);
423
424             fscal            = felec;
425
426             fscal            = _mm_and_pd(fscal,cutoff_mask);
427
428             /* Calculate temporary vectorial force */
429             tx               = _mm_mul_pd(fscal,dx13);
430             ty               = _mm_mul_pd(fscal,dy13);
431             tz               = _mm_mul_pd(fscal,dz13);
432
433             /* Update vectorial force */
434             fix1             = _mm_add_pd(fix1,tx);
435             fiy1             = _mm_add_pd(fiy1,ty);
436             fiz1             = _mm_add_pd(fiz1,tz);
437
438             fjx3             = _mm_add_pd(fjx3,tx);
439             fjy3             = _mm_add_pd(fjy3,ty);
440             fjz3             = _mm_add_pd(fjz3,tz);
441
442             }
443
444             /**************************
445              * CALCULATE INTERACTIONS *
446              **************************/
447
448             if (gmx_mm_any_lt(rsq21,rcutoff2))
449             {
450
451             /* REACTION-FIELD ELECTROSTATICS */
452             velec            = _mm_mul_pd(qq21,_mm_sub_pd(_mm_add_pd(rinv21,_mm_mul_pd(krf,rsq21)),crf));
453             felec            = _mm_mul_pd(qq21,_mm_sub_pd(_mm_mul_pd(rinv21,rinvsq21),krf2));
454
455             cutoff_mask      = _mm_cmplt_pd(rsq21,rcutoff2);
456
457             /* Update potential sum for this i atom from the interaction with this j atom. */
458             velec            = _mm_and_pd(velec,cutoff_mask);
459             velecsum         = _mm_add_pd(velecsum,velec);
460
461             fscal            = felec;
462
463             fscal            = _mm_and_pd(fscal,cutoff_mask);
464
465             /* Calculate temporary vectorial force */
466             tx               = _mm_mul_pd(fscal,dx21);
467             ty               = _mm_mul_pd(fscal,dy21);
468             tz               = _mm_mul_pd(fscal,dz21);
469
470             /* Update vectorial force */
471             fix2             = _mm_add_pd(fix2,tx);
472             fiy2             = _mm_add_pd(fiy2,ty);
473             fiz2             = _mm_add_pd(fiz2,tz);
474
475             fjx1             = _mm_add_pd(fjx1,tx);
476             fjy1             = _mm_add_pd(fjy1,ty);
477             fjz1             = _mm_add_pd(fjz1,tz);
478
479             }
480
481             /**************************
482              * CALCULATE INTERACTIONS *
483              **************************/
484
485             if (gmx_mm_any_lt(rsq22,rcutoff2))
486             {
487
488             /* REACTION-FIELD ELECTROSTATICS */
489             velec            = _mm_mul_pd(qq22,_mm_sub_pd(_mm_add_pd(rinv22,_mm_mul_pd(krf,rsq22)),crf));
490             felec            = _mm_mul_pd(qq22,_mm_sub_pd(_mm_mul_pd(rinv22,rinvsq22),krf2));
491
492             cutoff_mask      = _mm_cmplt_pd(rsq22,rcutoff2);
493
494             /* Update potential sum for this i atom from the interaction with this j atom. */
495             velec            = _mm_and_pd(velec,cutoff_mask);
496             velecsum         = _mm_add_pd(velecsum,velec);
497
498             fscal            = felec;
499
500             fscal            = _mm_and_pd(fscal,cutoff_mask);
501
502             /* Calculate temporary vectorial force */
503             tx               = _mm_mul_pd(fscal,dx22);
504             ty               = _mm_mul_pd(fscal,dy22);
505             tz               = _mm_mul_pd(fscal,dz22);
506
507             /* Update vectorial force */
508             fix2             = _mm_add_pd(fix2,tx);
509             fiy2             = _mm_add_pd(fiy2,ty);
510             fiz2             = _mm_add_pd(fiz2,tz);
511
512             fjx2             = _mm_add_pd(fjx2,tx);
513             fjy2             = _mm_add_pd(fjy2,ty);
514             fjz2             = _mm_add_pd(fjz2,tz);
515
516             }
517
518             /**************************
519              * CALCULATE INTERACTIONS *
520              **************************/
521
522             if (gmx_mm_any_lt(rsq23,rcutoff2))
523             {
524
525             /* REACTION-FIELD ELECTROSTATICS */
526             velec            = _mm_mul_pd(qq23,_mm_sub_pd(_mm_add_pd(rinv23,_mm_mul_pd(krf,rsq23)),crf));
527             felec            = _mm_mul_pd(qq23,_mm_sub_pd(_mm_mul_pd(rinv23,rinvsq23),krf2));
528
529             cutoff_mask      = _mm_cmplt_pd(rsq23,rcutoff2);
530
531             /* Update potential sum for this i atom from the interaction with this j atom. */
532             velec            = _mm_and_pd(velec,cutoff_mask);
533             velecsum         = _mm_add_pd(velecsum,velec);
534
535             fscal            = felec;
536
537             fscal            = _mm_and_pd(fscal,cutoff_mask);
538
539             /* Calculate temporary vectorial force */
540             tx               = _mm_mul_pd(fscal,dx23);
541             ty               = _mm_mul_pd(fscal,dy23);
542             tz               = _mm_mul_pd(fscal,dz23);
543
544             /* Update vectorial force */
545             fix2             = _mm_add_pd(fix2,tx);
546             fiy2             = _mm_add_pd(fiy2,ty);
547             fiz2             = _mm_add_pd(fiz2,tz);
548
549             fjx3             = _mm_add_pd(fjx3,tx);
550             fjy3             = _mm_add_pd(fjy3,ty);
551             fjz3             = _mm_add_pd(fjz3,tz);
552
553             }
554
555             /**************************
556              * CALCULATE INTERACTIONS *
557              **************************/
558
559             if (gmx_mm_any_lt(rsq31,rcutoff2))
560             {
561
562             /* REACTION-FIELD ELECTROSTATICS */
563             velec            = _mm_mul_pd(qq31,_mm_sub_pd(_mm_add_pd(rinv31,_mm_mul_pd(krf,rsq31)),crf));
564             felec            = _mm_mul_pd(qq31,_mm_sub_pd(_mm_mul_pd(rinv31,rinvsq31),krf2));
565
566             cutoff_mask      = _mm_cmplt_pd(rsq31,rcutoff2);
567
568             /* Update potential sum for this i atom from the interaction with this j atom. */
569             velec            = _mm_and_pd(velec,cutoff_mask);
570             velecsum         = _mm_add_pd(velecsum,velec);
571
572             fscal            = felec;
573
574             fscal            = _mm_and_pd(fscal,cutoff_mask);
575
576             /* Calculate temporary vectorial force */
577             tx               = _mm_mul_pd(fscal,dx31);
578             ty               = _mm_mul_pd(fscal,dy31);
579             tz               = _mm_mul_pd(fscal,dz31);
580
581             /* Update vectorial force */
582             fix3             = _mm_add_pd(fix3,tx);
583             fiy3             = _mm_add_pd(fiy3,ty);
584             fiz3             = _mm_add_pd(fiz3,tz);
585
586             fjx1             = _mm_add_pd(fjx1,tx);
587             fjy1             = _mm_add_pd(fjy1,ty);
588             fjz1             = _mm_add_pd(fjz1,tz);
589
590             }
591
592             /**************************
593              * CALCULATE INTERACTIONS *
594              **************************/
595
596             if (gmx_mm_any_lt(rsq32,rcutoff2))
597             {
598
599             /* REACTION-FIELD ELECTROSTATICS */
600             velec            = _mm_mul_pd(qq32,_mm_sub_pd(_mm_add_pd(rinv32,_mm_mul_pd(krf,rsq32)),crf));
601             felec            = _mm_mul_pd(qq32,_mm_sub_pd(_mm_mul_pd(rinv32,rinvsq32),krf2));
602
603             cutoff_mask      = _mm_cmplt_pd(rsq32,rcutoff2);
604
605             /* Update potential sum for this i atom from the interaction with this j atom. */
606             velec            = _mm_and_pd(velec,cutoff_mask);
607             velecsum         = _mm_add_pd(velecsum,velec);
608
609             fscal            = felec;
610
611             fscal            = _mm_and_pd(fscal,cutoff_mask);
612
613             /* Calculate temporary vectorial force */
614             tx               = _mm_mul_pd(fscal,dx32);
615             ty               = _mm_mul_pd(fscal,dy32);
616             tz               = _mm_mul_pd(fscal,dz32);
617
618             /* Update vectorial force */
619             fix3             = _mm_add_pd(fix3,tx);
620             fiy3             = _mm_add_pd(fiy3,ty);
621             fiz3             = _mm_add_pd(fiz3,tz);
622
623             fjx2             = _mm_add_pd(fjx2,tx);
624             fjy2             = _mm_add_pd(fjy2,ty);
625             fjz2             = _mm_add_pd(fjz2,tz);
626
627             }
628
629             /**************************
630              * CALCULATE INTERACTIONS *
631              **************************/
632
633             if (gmx_mm_any_lt(rsq33,rcutoff2))
634             {
635
636             /* REACTION-FIELD ELECTROSTATICS */
637             velec            = _mm_mul_pd(qq33,_mm_sub_pd(_mm_add_pd(rinv33,_mm_mul_pd(krf,rsq33)),crf));
638             felec            = _mm_mul_pd(qq33,_mm_sub_pd(_mm_mul_pd(rinv33,rinvsq33),krf2));
639
640             cutoff_mask      = _mm_cmplt_pd(rsq33,rcutoff2);
641
642             /* Update potential sum for this i atom from the interaction with this j atom. */
643             velec            = _mm_and_pd(velec,cutoff_mask);
644             velecsum         = _mm_add_pd(velecsum,velec);
645
646             fscal            = felec;
647
648             fscal            = _mm_and_pd(fscal,cutoff_mask);
649
650             /* Calculate temporary vectorial force */
651             tx               = _mm_mul_pd(fscal,dx33);
652             ty               = _mm_mul_pd(fscal,dy33);
653             tz               = _mm_mul_pd(fscal,dz33);
654
655             /* Update vectorial force */
656             fix3             = _mm_add_pd(fix3,tx);
657             fiy3             = _mm_add_pd(fiy3,ty);
658             fiz3             = _mm_add_pd(fiz3,tz);
659
660             fjx3             = _mm_add_pd(fjx3,tx);
661             fjy3             = _mm_add_pd(fjy3,ty);
662             fjz3             = _mm_add_pd(fjz3,tz);
663
664             }
665
666             gmx_mm_decrement_4rvec_2ptr_swizzle_pd(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
667
668             /* Inner loop uses 368 flops */
669         }
670
671         if(jidx<j_index_end)
672         {
673
674             jnrA             = jjnr[jidx];
675             j_coord_offsetA  = DIM*jnrA;
676
677             /* load j atom coordinates */
678             gmx_mm_load_4rvec_1ptr_swizzle_pd(x+j_coord_offsetA,
679                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
680                                               &jy2,&jz2,&jx3,&jy3,&jz3);
681
682             /* Calculate displacement vector */
683             dx00             = _mm_sub_pd(ix0,jx0);
684             dy00             = _mm_sub_pd(iy0,jy0);
685             dz00             = _mm_sub_pd(iz0,jz0);
686             dx11             = _mm_sub_pd(ix1,jx1);
687             dy11             = _mm_sub_pd(iy1,jy1);
688             dz11             = _mm_sub_pd(iz1,jz1);
689             dx12             = _mm_sub_pd(ix1,jx2);
690             dy12             = _mm_sub_pd(iy1,jy2);
691             dz12             = _mm_sub_pd(iz1,jz2);
692             dx13             = _mm_sub_pd(ix1,jx3);
693             dy13             = _mm_sub_pd(iy1,jy3);
694             dz13             = _mm_sub_pd(iz1,jz3);
695             dx21             = _mm_sub_pd(ix2,jx1);
696             dy21             = _mm_sub_pd(iy2,jy1);
697             dz21             = _mm_sub_pd(iz2,jz1);
698             dx22             = _mm_sub_pd(ix2,jx2);
699             dy22             = _mm_sub_pd(iy2,jy2);
700             dz22             = _mm_sub_pd(iz2,jz2);
701             dx23             = _mm_sub_pd(ix2,jx3);
702             dy23             = _mm_sub_pd(iy2,jy3);
703             dz23             = _mm_sub_pd(iz2,jz3);
704             dx31             = _mm_sub_pd(ix3,jx1);
705             dy31             = _mm_sub_pd(iy3,jy1);
706             dz31             = _mm_sub_pd(iz3,jz1);
707             dx32             = _mm_sub_pd(ix3,jx2);
708             dy32             = _mm_sub_pd(iy3,jy2);
709             dz32             = _mm_sub_pd(iz3,jz2);
710             dx33             = _mm_sub_pd(ix3,jx3);
711             dy33             = _mm_sub_pd(iy3,jy3);
712             dz33             = _mm_sub_pd(iz3,jz3);
713
714             /* Calculate squared distance and things based on it */
715             rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
716             rsq11            = gmx_mm_calc_rsq_pd(dx11,dy11,dz11);
717             rsq12            = gmx_mm_calc_rsq_pd(dx12,dy12,dz12);
718             rsq13            = gmx_mm_calc_rsq_pd(dx13,dy13,dz13);
719             rsq21            = gmx_mm_calc_rsq_pd(dx21,dy21,dz21);
720             rsq22            = gmx_mm_calc_rsq_pd(dx22,dy22,dz22);
721             rsq23            = gmx_mm_calc_rsq_pd(dx23,dy23,dz23);
722             rsq31            = gmx_mm_calc_rsq_pd(dx31,dy31,dz31);
723             rsq32            = gmx_mm_calc_rsq_pd(dx32,dy32,dz32);
724             rsq33            = gmx_mm_calc_rsq_pd(dx33,dy33,dz33);
725
726             rinv11           = gmx_mm_invsqrt_pd(rsq11);
727             rinv12           = gmx_mm_invsqrt_pd(rsq12);
728             rinv13           = gmx_mm_invsqrt_pd(rsq13);
729             rinv21           = gmx_mm_invsqrt_pd(rsq21);
730             rinv22           = gmx_mm_invsqrt_pd(rsq22);
731             rinv23           = gmx_mm_invsqrt_pd(rsq23);
732             rinv31           = gmx_mm_invsqrt_pd(rsq31);
733             rinv32           = gmx_mm_invsqrt_pd(rsq32);
734             rinv33           = gmx_mm_invsqrt_pd(rsq33);
735
736             rinvsq00         = gmx_mm_inv_pd(rsq00);
737             rinvsq11         = _mm_mul_pd(rinv11,rinv11);
738             rinvsq12         = _mm_mul_pd(rinv12,rinv12);
739             rinvsq13         = _mm_mul_pd(rinv13,rinv13);
740             rinvsq21         = _mm_mul_pd(rinv21,rinv21);
741             rinvsq22         = _mm_mul_pd(rinv22,rinv22);
742             rinvsq23         = _mm_mul_pd(rinv23,rinv23);
743             rinvsq31         = _mm_mul_pd(rinv31,rinv31);
744             rinvsq32         = _mm_mul_pd(rinv32,rinv32);
745             rinvsq33         = _mm_mul_pd(rinv33,rinv33);
746
747             fjx0             = _mm_setzero_pd();
748             fjy0             = _mm_setzero_pd();
749             fjz0             = _mm_setzero_pd();
750             fjx1             = _mm_setzero_pd();
751             fjy1             = _mm_setzero_pd();
752             fjz1             = _mm_setzero_pd();
753             fjx2             = _mm_setzero_pd();
754             fjy2             = _mm_setzero_pd();
755             fjz2             = _mm_setzero_pd();
756             fjx3             = _mm_setzero_pd();
757             fjy3             = _mm_setzero_pd();
758             fjz3             = _mm_setzero_pd();
759
760             /**************************
761              * CALCULATE INTERACTIONS *
762              **************************/
763
764             if (gmx_mm_any_lt(rsq00,rcutoff2))
765             {
766
767             /* LENNARD-JONES DISPERSION/REPULSION */
768
769             rinvsix          = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
770             vvdw6            = _mm_mul_pd(c6_00,rinvsix);
771             vvdw12           = _mm_mul_pd(c12_00,_mm_mul_pd(rinvsix,rinvsix));
772             vvdw             = _mm_sub_pd(_mm_mul_pd( _mm_sub_pd(vvdw12 , _mm_mul_pd(c12_00,_mm_mul_pd(sh_vdw_invrcut6,sh_vdw_invrcut6))), one_twelfth) ,
773                                           _mm_mul_pd( _mm_sub_pd(vvdw6,_mm_mul_pd(c6_00,sh_vdw_invrcut6)),one_sixth));
774             fvdw             = _mm_mul_pd(_mm_sub_pd(vvdw12,vvdw6),rinvsq00);
775
776             cutoff_mask      = _mm_cmplt_pd(rsq00,rcutoff2);
777
778             /* Update potential sum for this i atom from the interaction with this j atom. */
779             vvdw             = _mm_and_pd(vvdw,cutoff_mask);
780             vvdw             = _mm_unpacklo_pd(vvdw,_mm_setzero_pd());
781             vvdwsum          = _mm_add_pd(vvdwsum,vvdw);
782
783             fscal            = fvdw;
784
785             fscal            = _mm_and_pd(fscal,cutoff_mask);
786
787             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
788
789             /* Calculate temporary vectorial force */
790             tx               = _mm_mul_pd(fscal,dx00);
791             ty               = _mm_mul_pd(fscal,dy00);
792             tz               = _mm_mul_pd(fscal,dz00);
793
794             /* Update vectorial force */
795             fix0             = _mm_add_pd(fix0,tx);
796             fiy0             = _mm_add_pd(fiy0,ty);
797             fiz0             = _mm_add_pd(fiz0,tz);
798
799             fjx0             = _mm_add_pd(fjx0,tx);
800             fjy0             = _mm_add_pd(fjy0,ty);
801             fjz0             = _mm_add_pd(fjz0,tz);
802
803             }
804
805             /**************************
806              * CALCULATE INTERACTIONS *
807              **************************/
808
809             if (gmx_mm_any_lt(rsq11,rcutoff2))
810             {
811
812             /* REACTION-FIELD ELECTROSTATICS */
813             velec            = _mm_mul_pd(qq11,_mm_sub_pd(_mm_add_pd(rinv11,_mm_mul_pd(krf,rsq11)),crf));
814             felec            = _mm_mul_pd(qq11,_mm_sub_pd(_mm_mul_pd(rinv11,rinvsq11),krf2));
815
816             cutoff_mask      = _mm_cmplt_pd(rsq11,rcutoff2);
817
818             /* Update potential sum for this i atom from the interaction with this j atom. */
819             velec            = _mm_and_pd(velec,cutoff_mask);
820             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
821             velecsum         = _mm_add_pd(velecsum,velec);
822
823             fscal            = felec;
824
825             fscal            = _mm_and_pd(fscal,cutoff_mask);
826
827             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
828
829             /* Calculate temporary vectorial force */
830             tx               = _mm_mul_pd(fscal,dx11);
831             ty               = _mm_mul_pd(fscal,dy11);
832             tz               = _mm_mul_pd(fscal,dz11);
833
834             /* Update vectorial force */
835             fix1             = _mm_add_pd(fix1,tx);
836             fiy1             = _mm_add_pd(fiy1,ty);
837             fiz1             = _mm_add_pd(fiz1,tz);
838
839             fjx1             = _mm_add_pd(fjx1,tx);
840             fjy1             = _mm_add_pd(fjy1,ty);
841             fjz1             = _mm_add_pd(fjz1,tz);
842
843             }
844
845             /**************************
846              * CALCULATE INTERACTIONS *
847              **************************/
848
849             if (gmx_mm_any_lt(rsq12,rcutoff2))
850             {
851
852             /* REACTION-FIELD ELECTROSTATICS */
853             velec            = _mm_mul_pd(qq12,_mm_sub_pd(_mm_add_pd(rinv12,_mm_mul_pd(krf,rsq12)),crf));
854             felec            = _mm_mul_pd(qq12,_mm_sub_pd(_mm_mul_pd(rinv12,rinvsq12),krf2));
855
856             cutoff_mask      = _mm_cmplt_pd(rsq12,rcutoff2);
857
858             /* Update potential sum for this i atom from the interaction with this j atom. */
859             velec            = _mm_and_pd(velec,cutoff_mask);
860             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
861             velecsum         = _mm_add_pd(velecsum,velec);
862
863             fscal            = felec;
864
865             fscal            = _mm_and_pd(fscal,cutoff_mask);
866
867             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
868
869             /* Calculate temporary vectorial force */
870             tx               = _mm_mul_pd(fscal,dx12);
871             ty               = _mm_mul_pd(fscal,dy12);
872             tz               = _mm_mul_pd(fscal,dz12);
873
874             /* Update vectorial force */
875             fix1             = _mm_add_pd(fix1,tx);
876             fiy1             = _mm_add_pd(fiy1,ty);
877             fiz1             = _mm_add_pd(fiz1,tz);
878
879             fjx2             = _mm_add_pd(fjx2,tx);
880             fjy2             = _mm_add_pd(fjy2,ty);
881             fjz2             = _mm_add_pd(fjz2,tz);
882
883             }
884
885             /**************************
886              * CALCULATE INTERACTIONS *
887              **************************/
888
889             if (gmx_mm_any_lt(rsq13,rcutoff2))
890             {
891
892             /* REACTION-FIELD ELECTROSTATICS */
893             velec            = _mm_mul_pd(qq13,_mm_sub_pd(_mm_add_pd(rinv13,_mm_mul_pd(krf,rsq13)),crf));
894             felec            = _mm_mul_pd(qq13,_mm_sub_pd(_mm_mul_pd(rinv13,rinvsq13),krf2));
895
896             cutoff_mask      = _mm_cmplt_pd(rsq13,rcutoff2);
897
898             /* Update potential sum for this i atom from the interaction with this j atom. */
899             velec            = _mm_and_pd(velec,cutoff_mask);
900             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
901             velecsum         = _mm_add_pd(velecsum,velec);
902
903             fscal            = felec;
904
905             fscal            = _mm_and_pd(fscal,cutoff_mask);
906
907             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
908
909             /* Calculate temporary vectorial force */
910             tx               = _mm_mul_pd(fscal,dx13);
911             ty               = _mm_mul_pd(fscal,dy13);
912             tz               = _mm_mul_pd(fscal,dz13);
913
914             /* Update vectorial force */
915             fix1             = _mm_add_pd(fix1,tx);
916             fiy1             = _mm_add_pd(fiy1,ty);
917             fiz1             = _mm_add_pd(fiz1,tz);
918
919             fjx3             = _mm_add_pd(fjx3,tx);
920             fjy3             = _mm_add_pd(fjy3,ty);
921             fjz3             = _mm_add_pd(fjz3,tz);
922
923             }
924
925             /**************************
926              * CALCULATE INTERACTIONS *
927              **************************/
928
929             if (gmx_mm_any_lt(rsq21,rcutoff2))
930             {
931
932             /* REACTION-FIELD ELECTROSTATICS */
933             velec            = _mm_mul_pd(qq21,_mm_sub_pd(_mm_add_pd(rinv21,_mm_mul_pd(krf,rsq21)),crf));
934             felec            = _mm_mul_pd(qq21,_mm_sub_pd(_mm_mul_pd(rinv21,rinvsq21),krf2));
935
936             cutoff_mask      = _mm_cmplt_pd(rsq21,rcutoff2);
937
938             /* Update potential sum for this i atom from the interaction with this j atom. */
939             velec            = _mm_and_pd(velec,cutoff_mask);
940             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
941             velecsum         = _mm_add_pd(velecsum,velec);
942
943             fscal            = felec;
944
945             fscal            = _mm_and_pd(fscal,cutoff_mask);
946
947             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
948
949             /* Calculate temporary vectorial force */
950             tx               = _mm_mul_pd(fscal,dx21);
951             ty               = _mm_mul_pd(fscal,dy21);
952             tz               = _mm_mul_pd(fscal,dz21);
953
954             /* Update vectorial force */
955             fix2             = _mm_add_pd(fix2,tx);
956             fiy2             = _mm_add_pd(fiy2,ty);
957             fiz2             = _mm_add_pd(fiz2,tz);
958
959             fjx1             = _mm_add_pd(fjx1,tx);
960             fjy1             = _mm_add_pd(fjy1,ty);
961             fjz1             = _mm_add_pd(fjz1,tz);
962
963             }
964
965             /**************************
966              * CALCULATE INTERACTIONS *
967              **************************/
968
969             if (gmx_mm_any_lt(rsq22,rcutoff2))
970             {
971
972             /* REACTION-FIELD ELECTROSTATICS */
973             velec            = _mm_mul_pd(qq22,_mm_sub_pd(_mm_add_pd(rinv22,_mm_mul_pd(krf,rsq22)),crf));
974             felec            = _mm_mul_pd(qq22,_mm_sub_pd(_mm_mul_pd(rinv22,rinvsq22),krf2));
975
976             cutoff_mask      = _mm_cmplt_pd(rsq22,rcutoff2);
977
978             /* Update potential sum for this i atom from the interaction with this j atom. */
979             velec            = _mm_and_pd(velec,cutoff_mask);
980             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
981             velecsum         = _mm_add_pd(velecsum,velec);
982
983             fscal            = felec;
984
985             fscal            = _mm_and_pd(fscal,cutoff_mask);
986
987             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
988
989             /* Calculate temporary vectorial force */
990             tx               = _mm_mul_pd(fscal,dx22);
991             ty               = _mm_mul_pd(fscal,dy22);
992             tz               = _mm_mul_pd(fscal,dz22);
993
994             /* Update vectorial force */
995             fix2             = _mm_add_pd(fix2,tx);
996             fiy2             = _mm_add_pd(fiy2,ty);
997             fiz2             = _mm_add_pd(fiz2,tz);
998
999             fjx2             = _mm_add_pd(fjx2,tx);
1000             fjy2             = _mm_add_pd(fjy2,ty);
1001             fjz2             = _mm_add_pd(fjz2,tz);
1002
1003             }
1004
1005             /**************************
1006              * CALCULATE INTERACTIONS *
1007              **************************/
1008
1009             if (gmx_mm_any_lt(rsq23,rcutoff2))
1010             {
1011
1012             /* REACTION-FIELD ELECTROSTATICS */
1013             velec            = _mm_mul_pd(qq23,_mm_sub_pd(_mm_add_pd(rinv23,_mm_mul_pd(krf,rsq23)),crf));
1014             felec            = _mm_mul_pd(qq23,_mm_sub_pd(_mm_mul_pd(rinv23,rinvsq23),krf2));
1015
1016             cutoff_mask      = _mm_cmplt_pd(rsq23,rcutoff2);
1017
1018             /* Update potential sum for this i atom from the interaction with this j atom. */
1019             velec            = _mm_and_pd(velec,cutoff_mask);
1020             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
1021             velecsum         = _mm_add_pd(velecsum,velec);
1022
1023             fscal            = felec;
1024
1025             fscal            = _mm_and_pd(fscal,cutoff_mask);
1026
1027             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1028
1029             /* Calculate temporary vectorial force */
1030             tx               = _mm_mul_pd(fscal,dx23);
1031             ty               = _mm_mul_pd(fscal,dy23);
1032             tz               = _mm_mul_pd(fscal,dz23);
1033
1034             /* Update vectorial force */
1035             fix2             = _mm_add_pd(fix2,tx);
1036             fiy2             = _mm_add_pd(fiy2,ty);
1037             fiz2             = _mm_add_pd(fiz2,tz);
1038
1039             fjx3             = _mm_add_pd(fjx3,tx);
1040             fjy3             = _mm_add_pd(fjy3,ty);
1041             fjz3             = _mm_add_pd(fjz3,tz);
1042
1043             }
1044
1045             /**************************
1046              * CALCULATE INTERACTIONS *
1047              **************************/
1048
1049             if (gmx_mm_any_lt(rsq31,rcutoff2))
1050             {
1051
1052             /* REACTION-FIELD ELECTROSTATICS */
1053             velec            = _mm_mul_pd(qq31,_mm_sub_pd(_mm_add_pd(rinv31,_mm_mul_pd(krf,rsq31)),crf));
1054             felec            = _mm_mul_pd(qq31,_mm_sub_pd(_mm_mul_pd(rinv31,rinvsq31),krf2));
1055
1056             cutoff_mask      = _mm_cmplt_pd(rsq31,rcutoff2);
1057
1058             /* Update potential sum for this i atom from the interaction with this j atom. */
1059             velec            = _mm_and_pd(velec,cutoff_mask);
1060             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
1061             velecsum         = _mm_add_pd(velecsum,velec);
1062
1063             fscal            = felec;
1064
1065             fscal            = _mm_and_pd(fscal,cutoff_mask);
1066
1067             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1068
1069             /* Calculate temporary vectorial force */
1070             tx               = _mm_mul_pd(fscal,dx31);
1071             ty               = _mm_mul_pd(fscal,dy31);
1072             tz               = _mm_mul_pd(fscal,dz31);
1073
1074             /* Update vectorial force */
1075             fix3             = _mm_add_pd(fix3,tx);
1076             fiy3             = _mm_add_pd(fiy3,ty);
1077             fiz3             = _mm_add_pd(fiz3,tz);
1078
1079             fjx1             = _mm_add_pd(fjx1,tx);
1080             fjy1             = _mm_add_pd(fjy1,ty);
1081             fjz1             = _mm_add_pd(fjz1,tz);
1082
1083             }
1084
1085             /**************************
1086              * CALCULATE INTERACTIONS *
1087              **************************/
1088
1089             if (gmx_mm_any_lt(rsq32,rcutoff2))
1090             {
1091
1092             /* REACTION-FIELD ELECTROSTATICS */
1093             velec            = _mm_mul_pd(qq32,_mm_sub_pd(_mm_add_pd(rinv32,_mm_mul_pd(krf,rsq32)),crf));
1094             felec            = _mm_mul_pd(qq32,_mm_sub_pd(_mm_mul_pd(rinv32,rinvsq32),krf2));
1095
1096             cutoff_mask      = _mm_cmplt_pd(rsq32,rcutoff2);
1097
1098             /* Update potential sum for this i atom from the interaction with this j atom. */
1099             velec            = _mm_and_pd(velec,cutoff_mask);
1100             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
1101             velecsum         = _mm_add_pd(velecsum,velec);
1102
1103             fscal            = felec;
1104
1105             fscal            = _mm_and_pd(fscal,cutoff_mask);
1106
1107             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1108
1109             /* Calculate temporary vectorial force */
1110             tx               = _mm_mul_pd(fscal,dx32);
1111             ty               = _mm_mul_pd(fscal,dy32);
1112             tz               = _mm_mul_pd(fscal,dz32);
1113
1114             /* Update vectorial force */
1115             fix3             = _mm_add_pd(fix3,tx);
1116             fiy3             = _mm_add_pd(fiy3,ty);
1117             fiz3             = _mm_add_pd(fiz3,tz);
1118
1119             fjx2             = _mm_add_pd(fjx2,tx);
1120             fjy2             = _mm_add_pd(fjy2,ty);
1121             fjz2             = _mm_add_pd(fjz2,tz);
1122
1123             }
1124
1125             /**************************
1126              * CALCULATE INTERACTIONS *
1127              **************************/
1128
1129             if (gmx_mm_any_lt(rsq33,rcutoff2))
1130             {
1131
1132             /* REACTION-FIELD ELECTROSTATICS */
1133             velec            = _mm_mul_pd(qq33,_mm_sub_pd(_mm_add_pd(rinv33,_mm_mul_pd(krf,rsq33)),crf));
1134             felec            = _mm_mul_pd(qq33,_mm_sub_pd(_mm_mul_pd(rinv33,rinvsq33),krf2));
1135
1136             cutoff_mask      = _mm_cmplt_pd(rsq33,rcutoff2);
1137
1138             /* Update potential sum for this i atom from the interaction with this j atom. */
1139             velec            = _mm_and_pd(velec,cutoff_mask);
1140             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
1141             velecsum         = _mm_add_pd(velecsum,velec);
1142
1143             fscal            = felec;
1144
1145             fscal            = _mm_and_pd(fscal,cutoff_mask);
1146
1147             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1148
1149             /* Calculate temporary vectorial force */
1150             tx               = _mm_mul_pd(fscal,dx33);
1151             ty               = _mm_mul_pd(fscal,dy33);
1152             tz               = _mm_mul_pd(fscal,dz33);
1153
1154             /* Update vectorial force */
1155             fix3             = _mm_add_pd(fix3,tx);
1156             fiy3             = _mm_add_pd(fiy3,ty);
1157             fiz3             = _mm_add_pd(fiz3,tz);
1158
1159             fjx3             = _mm_add_pd(fjx3,tx);
1160             fjy3             = _mm_add_pd(fjy3,ty);
1161             fjz3             = _mm_add_pd(fjz3,tz);
1162
1163             }
1164
1165             gmx_mm_decrement_4rvec_1ptr_swizzle_pd(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1166
1167             /* Inner loop uses 368 flops */
1168         }
1169
1170         /* End of innermost loop */
1171
1172         gmx_mm_update_iforce_4atom_swizzle_pd(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
1173                                               f+i_coord_offset,fshift+i_shift_offset);
1174
1175         ggid                        = gid[iidx];
1176         /* Update potential energies */
1177         gmx_mm_update_1pot_pd(velecsum,kernel_data->energygrp_elec+ggid);
1178         gmx_mm_update_1pot_pd(vvdwsum,kernel_data->energygrp_vdw+ggid);
1179
1180         /* Increment number of inner iterations */
1181         inneriter                  += j_index_end - j_index_start;
1182
1183         /* Outer loop uses 26 flops */
1184     }
1185
1186     /* Increment number of outer iterations */
1187     outeriter        += nri;
1188
1189     /* Update outer/inner flops */
1190
1191     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*368);
1192 }
1193 /*
1194  * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_sse4_1_double
1195  * Electrostatics interaction: ReactionField
1196  * VdW interaction:            LennardJones
1197  * Geometry:                   Water4-Water4
1198  * Calculate force/pot:        Force
1199  */
1200 void
1201 nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_sse4_1_double
1202                     (t_nblist * gmx_restrict                nlist,
1203                      rvec * gmx_restrict                    xx,
1204                      rvec * gmx_restrict                    ff,
1205                      t_forcerec * gmx_restrict              fr,
1206                      t_mdatoms * gmx_restrict               mdatoms,
1207                      nb_kernel_data_t * gmx_restrict        kernel_data,
1208                      t_nrnb * gmx_restrict                  nrnb)
1209 {
1210     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1211      * just 0 for non-waters.
1212      * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
1213      * jnr indices corresponding to data put in the four positions in the SIMD register.
1214      */
1215     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
1216     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1217     int              jnrA,jnrB;
1218     int              j_coord_offsetA,j_coord_offsetB;
1219     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
1220     real             rcutoff_scalar;
1221     real             *shiftvec,*fshift,*x,*f;
1222     __m128d          tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1223     int              vdwioffset0;
1224     __m128d          ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1225     int              vdwioffset1;
1226     __m128d          ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1227     int              vdwioffset2;
1228     __m128d          ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1229     int              vdwioffset3;
1230     __m128d          ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
1231     int              vdwjidx0A,vdwjidx0B;
1232     __m128d          jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1233     int              vdwjidx1A,vdwjidx1B;
1234     __m128d          jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1235     int              vdwjidx2A,vdwjidx2B;
1236     __m128d          jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1237     int              vdwjidx3A,vdwjidx3B;
1238     __m128d          jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
1239     __m128d          dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1240     __m128d          dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1241     __m128d          dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1242     __m128d          dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
1243     __m128d          dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1244     __m128d          dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1245     __m128d          dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
1246     __m128d          dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
1247     __m128d          dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
1248     __m128d          dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
1249     __m128d          velec,felec,velecsum,facel,crf,krf,krf2;
1250     real             *charge;
1251     int              nvdwtype;
1252     __m128d          rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
1253     int              *vdwtype;
1254     real             *vdwparam;
1255     __m128d          one_sixth   = _mm_set1_pd(1.0/6.0);
1256     __m128d          one_twelfth = _mm_set1_pd(1.0/12.0);
1257     __m128d          dummy_mask,cutoff_mask;
1258     __m128d          signbit   = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
1259     __m128d          one     = _mm_set1_pd(1.0);
1260     __m128d          two     = _mm_set1_pd(2.0);
1261     x                = xx[0];
1262     f                = ff[0];
1263
1264     nri              = nlist->nri;
1265     iinr             = nlist->iinr;
1266     jindex           = nlist->jindex;
1267     jjnr             = nlist->jjnr;
1268     shiftidx         = nlist->shift;
1269     gid              = nlist->gid;
1270     shiftvec         = fr->shift_vec[0];
1271     fshift           = fr->fshift[0];
1272     facel            = _mm_set1_pd(fr->epsfac);
1273     charge           = mdatoms->chargeA;
1274     krf              = _mm_set1_pd(fr->ic->k_rf);
1275     krf2             = _mm_set1_pd(fr->ic->k_rf*2.0);
1276     crf              = _mm_set1_pd(fr->ic->c_rf);
1277     nvdwtype         = fr->ntype;
1278     vdwparam         = fr->nbfp;
1279     vdwtype          = mdatoms->typeA;
1280
1281     /* Setup water-specific parameters */
1282     inr              = nlist->iinr[0];
1283     iq1              = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+1]));
1284     iq2              = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+2]));
1285     iq3              = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+3]));
1286     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
1287
1288     jq1              = _mm_set1_pd(charge[inr+1]);
1289     jq2              = _mm_set1_pd(charge[inr+2]);
1290     jq3              = _mm_set1_pd(charge[inr+3]);
1291     vdwjidx0A        = 2*vdwtype[inr+0];
1292     c6_00            = _mm_set1_pd(vdwparam[vdwioffset0+vdwjidx0A]);
1293     c12_00           = _mm_set1_pd(vdwparam[vdwioffset0+vdwjidx0A+1]);
1294     qq11             = _mm_mul_pd(iq1,jq1);
1295     qq12             = _mm_mul_pd(iq1,jq2);
1296     qq13             = _mm_mul_pd(iq1,jq3);
1297     qq21             = _mm_mul_pd(iq2,jq1);
1298     qq22             = _mm_mul_pd(iq2,jq2);
1299     qq23             = _mm_mul_pd(iq2,jq3);
1300     qq31             = _mm_mul_pd(iq3,jq1);
1301     qq32             = _mm_mul_pd(iq3,jq2);
1302     qq33             = _mm_mul_pd(iq3,jq3);
1303
1304     /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
1305     rcutoff_scalar   = fr->rcoulomb;
1306     rcutoff          = _mm_set1_pd(rcutoff_scalar);
1307     rcutoff2         = _mm_mul_pd(rcutoff,rcutoff);
1308
1309     sh_vdw_invrcut6  = _mm_set1_pd(fr->ic->sh_invrc6);
1310     rvdw             = _mm_set1_pd(fr->rvdw);
1311
1312     /* Avoid stupid compiler warnings */
1313     jnrA = jnrB = 0;
1314     j_coord_offsetA = 0;
1315     j_coord_offsetB = 0;
1316
1317     outeriter        = 0;
1318     inneriter        = 0;
1319
1320     /* Start outer loop over neighborlists */
1321     for(iidx=0; iidx<nri; iidx++)
1322     {
1323         /* Load shift vector for this list */
1324         i_shift_offset   = DIM*shiftidx[iidx];
1325
1326         /* Load limits for loop over neighbors */
1327         j_index_start    = jindex[iidx];
1328         j_index_end      = jindex[iidx+1];
1329
1330         /* Get outer coordinate index */
1331         inr              = iinr[iidx];
1332         i_coord_offset   = DIM*inr;
1333
1334         /* Load i particle coords and add shift vector */
1335         gmx_mm_load_shift_and_4rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,
1336                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
1337
1338         fix0             = _mm_setzero_pd();
1339         fiy0             = _mm_setzero_pd();
1340         fiz0             = _mm_setzero_pd();
1341         fix1             = _mm_setzero_pd();
1342         fiy1             = _mm_setzero_pd();
1343         fiz1             = _mm_setzero_pd();
1344         fix2             = _mm_setzero_pd();
1345         fiy2             = _mm_setzero_pd();
1346         fiz2             = _mm_setzero_pd();
1347         fix3             = _mm_setzero_pd();
1348         fiy3             = _mm_setzero_pd();
1349         fiz3             = _mm_setzero_pd();
1350
1351         /* Start inner kernel loop */
1352         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1353         {
1354
1355             /* Get j neighbor index, and coordinate index */
1356             jnrA             = jjnr[jidx];
1357             jnrB             = jjnr[jidx+1];
1358             j_coord_offsetA  = DIM*jnrA;
1359             j_coord_offsetB  = DIM*jnrB;
1360
1361             /* load j atom coordinates */
1362             gmx_mm_load_4rvec_2ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
1363                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
1364                                               &jy2,&jz2,&jx3,&jy3,&jz3);
1365
1366             /* Calculate displacement vector */
1367             dx00             = _mm_sub_pd(ix0,jx0);
1368             dy00             = _mm_sub_pd(iy0,jy0);
1369             dz00             = _mm_sub_pd(iz0,jz0);
1370             dx11             = _mm_sub_pd(ix1,jx1);
1371             dy11             = _mm_sub_pd(iy1,jy1);
1372             dz11             = _mm_sub_pd(iz1,jz1);
1373             dx12             = _mm_sub_pd(ix1,jx2);
1374             dy12             = _mm_sub_pd(iy1,jy2);
1375             dz12             = _mm_sub_pd(iz1,jz2);
1376             dx13             = _mm_sub_pd(ix1,jx3);
1377             dy13             = _mm_sub_pd(iy1,jy3);
1378             dz13             = _mm_sub_pd(iz1,jz3);
1379             dx21             = _mm_sub_pd(ix2,jx1);
1380             dy21             = _mm_sub_pd(iy2,jy1);
1381             dz21             = _mm_sub_pd(iz2,jz1);
1382             dx22             = _mm_sub_pd(ix2,jx2);
1383             dy22             = _mm_sub_pd(iy2,jy2);
1384             dz22             = _mm_sub_pd(iz2,jz2);
1385             dx23             = _mm_sub_pd(ix2,jx3);
1386             dy23             = _mm_sub_pd(iy2,jy3);
1387             dz23             = _mm_sub_pd(iz2,jz3);
1388             dx31             = _mm_sub_pd(ix3,jx1);
1389             dy31             = _mm_sub_pd(iy3,jy1);
1390             dz31             = _mm_sub_pd(iz3,jz1);
1391             dx32             = _mm_sub_pd(ix3,jx2);
1392             dy32             = _mm_sub_pd(iy3,jy2);
1393             dz32             = _mm_sub_pd(iz3,jz2);
1394             dx33             = _mm_sub_pd(ix3,jx3);
1395             dy33             = _mm_sub_pd(iy3,jy3);
1396             dz33             = _mm_sub_pd(iz3,jz3);
1397
1398             /* Calculate squared distance and things based on it */
1399             rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
1400             rsq11            = gmx_mm_calc_rsq_pd(dx11,dy11,dz11);
1401             rsq12            = gmx_mm_calc_rsq_pd(dx12,dy12,dz12);
1402             rsq13            = gmx_mm_calc_rsq_pd(dx13,dy13,dz13);
1403             rsq21            = gmx_mm_calc_rsq_pd(dx21,dy21,dz21);
1404             rsq22            = gmx_mm_calc_rsq_pd(dx22,dy22,dz22);
1405             rsq23            = gmx_mm_calc_rsq_pd(dx23,dy23,dz23);
1406             rsq31            = gmx_mm_calc_rsq_pd(dx31,dy31,dz31);
1407             rsq32            = gmx_mm_calc_rsq_pd(dx32,dy32,dz32);
1408             rsq33            = gmx_mm_calc_rsq_pd(dx33,dy33,dz33);
1409
1410             rinv11           = gmx_mm_invsqrt_pd(rsq11);
1411             rinv12           = gmx_mm_invsqrt_pd(rsq12);
1412             rinv13           = gmx_mm_invsqrt_pd(rsq13);
1413             rinv21           = gmx_mm_invsqrt_pd(rsq21);
1414             rinv22           = gmx_mm_invsqrt_pd(rsq22);
1415             rinv23           = gmx_mm_invsqrt_pd(rsq23);
1416             rinv31           = gmx_mm_invsqrt_pd(rsq31);
1417             rinv32           = gmx_mm_invsqrt_pd(rsq32);
1418             rinv33           = gmx_mm_invsqrt_pd(rsq33);
1419
1420             rinvsq00         = gmx_mm_inv_pd(rsq00);
1421             rinvsq11         = _mm_mul_pd(rinv11,rinv11);
1422             rinvsq12         = _mm_mul_pd(rinv12,rinv12);
1423             rinvsq13         = _mm_mul_pd(rinv13,rinv13);
1424             rinvsq21         = _mm_mul_pd(rinv21,rinv21);
1425             rinvsq22         = _mm_mul_pd(rinv22,rinv22);
1426             rinvsq23         = _mm_mul_pd(rinv23,rinv23);
1427             rinvsq31         = _mm_mul_pd(rinv31,rinv31);
1428             rinvsq32         = _mm_mul_pd(rinv32,rinv32);
1429             rinvsq33         = _mm_mul_pd(rinv33,rinv33);
1430
1431             fjx0             = _mm_setzero_pd();
1432             fjy0             = _mm_setzero_pd();
1433             fjz0             = _mm_setzero_pd();
1434             fjx1             = _mm_setzero_pd();
1435             fjy1             = _mm_setzero_pd();
1436             fjz1             = _mm_setzero_pd();
1437             fjx2             = _mm_setzero_pd();
1438             fjy2             = _mm_setzero_pd();
1439             fjz2             = _mm_setzero_pd();
1440             fjx3             = _mm_setzero_pd();
1441             fjy3             = _mm_setzero_pd();
1442             fjz3             = _mm_setzero_pd();
1443
1444             /**************************
1445              * CALCULATE INTERACTIONS *
1446              **************************/
1447
1448             if (gmx_mm_any_lt(rsq00,rcutoff2))
1449             {
1450
1451             /* LENNARD-JONES DISPERSION/REPULSION */
1452
1453             rinvsix          = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
1454             fvdw             = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(c12_00,rinvsix),c6_00),_mm_mul_pd(rinvsix,rinvsq00));
1455
1456             cutoff_mask      = _mm_cmplt_pd(rsq00,rcutoff2);
1457
1458             fscal            = fvdw;
1459
1460             fscal            = _mm_and_pd(fscal,cutoff_mask);
1461
1462             /* Calculate temporary vectorial force */
1463             tx               = _mm_mul_pd(fscal,dx00);
1464             ty               = _mm_mul_pd(fscal,dy00);
1465             tz               = _mm_mul_pd(fscal,dz00);
1466
1467             /* Update vectorial force */
1468             fix0             = _mm_add_pd(fix0,tx);
1469             fiy0             = _mm_add_pd(fiy0,ty);
1470             fiz0             = _mm_add_pd(fiz0,tz);
1471
1472             fjx0             = _mm_add_pd(fjx0,tx);
1473             fjy0             = _mm_add_pd(fjy0,ty);
1474             fjz0             = _mm_add_pd(fjz0,tz);
1475
1476             }
1477
1478             /**************************
1479              * CALCULATE INTERACTIONS *
1480              **************************/
1481
1482             if (gmx_mm_any_lt(rsq11,rcutoff2))
1483             {
1484
1485             /* REACTION-FIELD ELECTROSTATICS */
1486             felec            = _mm_mul_pd(qq11,_mm_sub_pd(_mm_mul_pd(rinv11,rinvsq11),krf2));
1487
1488             cutoff_mask      = _mm_cmplt_pd(rsq11,rcutoff2);
1489
1490             fscal            = felec;
1491
1492             fscal            = _mm_and_pd(fscal,cutoff_mask);
1493
1494             /* Calculate temporary vectorial force */
1495             tx               = _mm_mul_pd(fscal,dx11);
1496             ty               = _mm_mul_pd(fscal,dy11);
1497             tz               = _mm_mul_pd(fscal,dz11);
1498
1499             /* Update vectorial force */
1500             fix1             = _mm_add_pd(fix1,tx);
1501             fiy1             = _mm_add_pd(fiy1,ty);
1502             fiz1             = _mm_add_pd(fiz1,tz);
1503
1504             fjx1             = _mm_add_pd(fjx1,tx);
1505             fjy1             = _mm_add_pd(fjy1,ty);
1506             fjz1             = _mm_add_pd(fjz1,tz);
1507
1508             }
1509
1510             /**************************
1511              * CALCULATE INTERACTIONS *
1512              **************************/
1513
1514             if (gmx_mm_any_lt(rsq12,rcutoff2))
1515             {
1516
1517             /* REACTION-FIELD ELECTROSTATICS */
1518             felec            = _mm_mul_pd(qq12,_mm_sub_pd(_mm_mul_pd(rinv12,rinvsq12),krf2));
1519
1520             cutoff_mask      = _mm_cmplt_pd(rsq12,rcutoff2);
1521
1522             fscal            = felec;
1523
1524             fscal            = _mm_and_pd(fscal,cutoff_mask);
1525
1526             /* Calculate temporary vectorial force */
1527             tx               = _mm_mul_pd(fscal,dx12);
1528             ty               = _mm_mul_pd(fscal,dy12);
1529             tz               = _mm_mul_pd(fscal,dz12);
1530
1531             /* Update vectorial force */
1532             fix1             = _mm_add_pd(fix1,tx);
1533             fiy1             = _mm_add_pd(fiy1,ty);
1534             fiz1             = _mm_add_pd(fiz1,tz);
1535
1536             fjx2             = _mm_add_pd(fjx2,tx);
1537             fjy2             = _mm_add_pd(fjy2,ty);
1538             fjz2             = _mm_add_pd(fjz2,tz);
1539
1540             }
1541
1542             /**************************
1543              * CALCULATE INTERACTIONS *
1544              **************************/
1545
1546             if (gmx_mm_any_lt(rsq13,rcutoff2))
1547             {
1548
1549             /* REACTION-FIELD ELECTROSTATICS */
1550             felec            = _mm_mul_pd(qq13,_mm_sub_pd(_mm_mul_pd(rinv13,rinvsq13),krf2));
1551
1552             cutoff_mask      = _mm_cmplt_pd(rsq13,rcutoff2);
1553
1554             fscal            = felec;
1555
1556             fscal            = _mm_and_pd(fscal,cutoff_mask);
1557
1558             /* Calculate temporary vectorial force */
1559             tx               = _mm_mul_pd(fscal,dx13);
1560             ty               = _mm_mul_pd(fscal,dy13);
1561             tz               = _mm_mul_pd(fscal,dz13);
1562
1563             /* Update vectorial force */
1564             fix1             = _mm_add_pd(fix1,tx);
1565             fiy1             = _mm_add_pd(fiy1,ty);
1566             fiz1             = _mm_add_pd(fiz1,tz);
1567
1568             fjx3             = _mm_add_pd(fjx3,tx);
1569             fjy3             = _mm_add_pd(fjy3,ty);
1570             fjz3             = _mm_add_pd(fjz3,tz);
1571
1572             }
1573
1574             /**************************
1575              * CALCULATE INTERACTIONS *
1576              **************************/
1577
1578             if (gmx_mm_any_lt(rsq21,rcutoff2))
1579             {
1580
1581             /* REACTION-FIELD ELECTROSTATICS */
1582             felec            = _mm_mul_pd(qq21,_mm_sub_pd(_mm_mul_pd(rinv21,rinvsq21),krf2));
1583
1584             cutoff_mask      = _mm_cmplt_pd(rsq21,rcutoff2);
1585
1586             fscal            = felec;
1587
1588             fscal            = _mm_and_pd(fscal,cutoff_mask);
1589
1590             /* Calculate temporary vectorial force */
1591             tx               = _mm_mul_pd(fscal,dx21);
1592             ty               = _mm_mul_pd(fscal,dy21);
1593             tz               = _mm_mul_pd(fscal,dz21);
1594
1595             /* Update vectorial force */
1596             fix2             = _mm_add_pd(fix2,tx);
1597             fiy2             = _mm_add_pd(fiy2,ty);
1598             fiz2             = _mm_add_pd(fiz2,tz);
1599
1600             fjx1             = _mm_add_pd(fjx1,tx);
1601             fjy1             = _mm_add_pd(fjy1,ty);
1602             fjz1             = _mm_add_pd(fjz1,tz);
1603
1604             }
1605
1606             /**************************
1607              * CALCULATE INTERACTIONS *
1608              **************************/
1609
1610             if (gmx_mm_any_lt(rsq22,rcutoff2))
1611             {
1612
1613             /* REACTION-FIELD ELECTROSTATICS */
1614             felec            = _mm_mul_pd(qq22,_mm_sub_pd(_mm_mul_pd(rinv22,rinvsq22),krf2));
1615
1616             cutoff_mask      = _mm_cmplt_pd(rsq22,rcutoff2);
1617
1618             fscal            = felec;
1619
1620             fscal            = _mm_and_pd(fscal,cutoff_mask);
1621
1622             /* Calculate temporary vectorial force */
1623             tx               = _mm_mul_pd(fscal,dx22);
1624             ty               = _mm_mul_pd(fscal,dy22);
1625             tz               = _mm_mul_pd(fscal,dz22);
1626
1627             /* Update vectorial force */
1628             fix2             = _mm_add_pd(fix2,tx);
1629             fiy2             = _mm_add_pd(fiy2,ty);
1630             fiz2             = _mm_add_pd(fiz2,tz);
1631
1632             fjx2             = _mm_add_pd(fjx2,tx);
1633             fjy2             = _mm_add_pd(fjy2,ty);
1634             fjz2             = _mm_add_pd(fjz2,tz);
1635
1636             }
1637
1638             /**************************
1639              * CALCULATE INTERACTIONS *
1640              **************************/
1641
1642             if (gmx_mm_any_lt(rsq23,rcutoff2))
1643             {
1644
1645             /* REACTION-FIELD ELECTROSTATICS */
1646             felec            = _mm_mul_pd(qq23,_mm_sub_pd(_mm_mul_pd(rinv23,rinvsq23),krf2));
1647
1648             cutoff_mask      = _mm_cmplt_pd(rsq23,rcutoff2);
1649
1650             fscal            = felec;
1651
1652             fscal            = _mm_and_pd(fscal,cutoff_mask);
1653
1654             /* Calculate temporary vectorial force */
1655             tx               = _mm_mul_pd(fscal,dx23);
1656             ty               = _mm_mul_pd(fscal,dy23);
1657             tz               = _mm_mul_pd(fscal,dz23);
1658
1659             /* Update vectorial force */
1660             fix2             = _mm_add_pd(fix2,tx);
1661             fiy2             = _mm_add_pd(fiy2,ty);
1662             fiz2             = _mm_add_pd(fiz2,tz);
1663
1664             fjx3             = _mm_add_pd(fjx3,tx);
1665             fjy3             = _mm_add_pd(fjy3,ty);
1666             fjz3             = _mm_add_pd(fjz3,tz);
1667
1668             }
1669
1670             /**************************
1671              * CALCULATE INTERACTIONS *
1672              **************************/
1673
1674             if (gmx_mm_any_lt(rsq31,rcutoff2))
1675             {
1676
1677             /* REACTION-FIELD ELECTROSTATICS */
1678             felec            = _mm_mul_pd(qq31,_mm_sub_pd(_mm_mul_pd(rinv31,rinvsq31),krf2));
1679
1680             cutoff_mask      = _mm_cmplt_pd(rsq31,rcutoff2);
1681
1682             fscal            = felec;
1683
1684             fscal            = _mm_and_pd(fscal,cutoff_mask);
1685
1686             /* Calculate temporary vectorial force */
1687             tx               = _mm_mul_pd(fscal,dx31);
1688             ty               = _mm_mul_pd(fscal,dy31);
1689             tz               = _mm_mul_pd(fscal,dz31);
1690
1691             /* Update vectorial force */
1692             fix3             = _mm_add_pd(fix3,tx);
1693             fiy3             = _mm_add_pd(fiy3,ty);
1694             fiz3             = _mm_add_pd(fiz3,tz);
1695
1696             fjx1             = _mm_add_pd(fjx1,tx);
1697             fjy1             = _mm_add_pd(fjy1,ty);
1698             fjz1             = _mm_add_pd(fjz1,tz);
1699
1700             }
1701
1702             /**************************
1703              * CALCULATE INTERACTIONS *
1704              **************************/
1705
1706             if (gmx_mm_any_lt(rsq32,rcutoff2))
1707             {
1708
1709             /* REACTION-FIELD ELECTROSTATICS */
1710             felec            = _mm_mul_pd(qq32,_mm_sub_pd(_mm_mul_pd(rinv32,rinvsq32),krf2));
1711
1712             cutoff_mask      = _mm_cmplt_pd(rsq32,rcutoff2);
1713
1714             fscal            = felec;
1715
1716             fscal            = _mm_and_pd(fscal,cutoff_mask);
1717
1718             /* Calculate temporary vectorial force */
1719             tx               = _mm_mul_pd(fscal,dx32);
1720             ty               = _mm_mul_pd(fscal,dy32);
1721             tz               = _mm_mul_pd(fscal,dz32);
1722
1723             /* Update vectorial force */
1724             fix3             = _mm_add_pd(fix3,tx);
1725             fiy3             = _mm_add_pd(fiy3,ty);
1726             fiz3             = _mm_add_pd(fiz3,tz);
1727
1728             fjx2             = _mm_add_pd(fjx2,tx);
1729             fjy2             = _mm_add_pd(fjy2,ty);
1730             fjz2             = _mm_add_pd(fjz2,tz);
1731
1732             }
1733
1734             /**************************
1735              * CALCULATE INTERACTIONS *
1736              **************************/
1737
1738             if (gmx_mm_any_lt(rsq33,rcutoff2))
1739             {
1740
1741             /* REACTION-FIELD ELECTROSTATICS */
1742             felec            = _mm_mul_pd(qq33,_mm_sub_pd(_mm_mul_pd(rinv33,rinvsq33),krf2));
1743
1744             cutoff_mask      = _mm_cmplt_pd(rsq33,rcutoff2);
1745
1746             fscal            = felec;
1747
1748             fscal            = _mm_and_pd(fscal,cutoff_mask);
1749
1750             /* Calculate temporary vectorial force */
1751             tx               = _mm_mul_pd(fscal,dx33);
1752             ty               = _mm_mul_pd(fscal,dy33);
1753             tz               = _mm_mul_pd(fscal,dz33);
1754
1755             /* Update vectorial force */
1756             fix3             = _mm_add_pd(fix3,tx);
1757             fiy3             = _mm_add_pd(fiy3,ty);
1758             fiz3             = _mm_add_pd(fiz3,tz);
1759
1760             fjx3             = _mm_add_pd(fjx3,tx);
1761             fjy3             = _mm_add_pd(fjy3,ty);
1762             fjz3             = _mm_add_pd(fjz3,tz);
1763
1764             }
1765
1766             gmx_mm_decrement_4rvec_2ptr_swizzle_pd(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1767
1768             /* Inner loop uses 303 flops */
1769         }
1770
1771         if(jidx<j_index_end)
1772         {
1773
1774             jnrA             = jjnr[jidx];
1775             j_coord_offsetA  = DIM*jnrA;
1776
1777             /* load j atom coordinates */
1778             gmx_mm_load_4rvec_1ptr_swizzle_pd(x+j_coord_offsetA,
1779                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
1780                                               &jy2,&jz2,&jx3,&jy3,&jz3);
1781
1782             /* Calculate displacement vector */
1783             dx00             = _mm_sub_pd(ix0,jx0);
1784             dy00             = _mm_sub_pd(iy0,jy0);
1785             dz00             = _mm_sub_pd(iz0,jz0);
1786             dx11             = _mm_sub_pd(ix1,jx1);
1787             dy11             = _mm_sub_pd(iy1,jy1);
1788             dz11             = _mm_sub_pd(iz1,jz1);
1789             dx12             = _mm_sub_pd(ix1,jx2);
1790             dy12             = _mm_sub_pd(iy1,jy2);
1791             dz12             = _mm_sub_pd(iz1,jz2);
1792             dx13             = _mm_sub_pd(ix1,jx3);
1793             dy13             = _mm_sub_pd(iy1,jy3);
1794             dz13             = _mm_sub_pd(iz1,jz3);
1795             dx21             = _mm_sub_pd(ix2,jx1);
1796             dy21             = _mm_sub_pd(iy2,jy1);
1797             dz21             = _mm_sub_pd(iz2,jz1);
1798             dx22             = _mm_sub_pd(ix2,jx2);
1799             dy22             = _mm_sub_pd(iy2,jy2);
1800             dz22             = _mm_sub_pd(iz2,jz2);
1801             dx23             = _mm_sub_pd(ix2,jx3);
1802             dy23             = _mm_sub_pd(iy2,jy3);
1803             dz23             = _mm_sub_pd(iz2,jz3);
1804             dx31             = _mm_sub_pd(ix3,jx1);
1805             dy31             = _mm_sub_pd(iy3,jy1);
1806             dz31             = _mm_sub_pd(iz3,jz1);
1807             dx32             = _mm_sub_pd(ix3,jx2);
1808             dy32             = _mm_sub_pd(iy3,jy2);
1809             dz32             = _mm_sub_pd(iz3,jz2);
1810             dx33             = _mm_sub_pd(ix3,jx3);
1811             dy33             = _mm_sub_pd(iy3,jy3);
1812             dz33             = _mm_sub_pd(iz3,jz3);
1813
1814             /* Calculate squared distance and things based on it */
1815             rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
1816             rsq11            = gmx_mm_calc_rsq_pd(dx11,dy11,dz11);
1817             rsq12            = gmx_mm_calc_rsq_pd(dx12,dy12,dz12);
1818             rsq13            = gmx_mm_calc_rsq_pd(dx13,dy13,dz13);
1819             rsq21            = gmx_mm_calc_rsq_pd(dx21,dy21,dz21);
1820             rsq22            = gmx_mm_calc_rsq_pd(dx22,dy22,dz22);
1821             rsq23            = gmx_mm_calc_rsq_pd(dx23,dy23,dz23);
1822             rsq31            = gmx_mm_calc_rsq_pd(dx31,dy31,dz31);
1823             rsq32            = gmx_mm_calc_rsq_pd(dx32,dy32,dz32);
1824             rsq33            = gmx_mm_calc_rsq_pd(dx33,dy33,dz33);
1825
1826             rinv11           = gmx_mm_invsqrt_pd(rsq11);
1827             rinv12           = gmx_mm_invsqrt_pd(rsq12);
1828             rinv13           = gmx_mm_invsqrt_pd(rsq13);
1829             rinv21           = gmx_mm_invsqrt_pd(rsq21);
1830             rinv22           = gmx_mm_invsqrt_pd(rsq22);
1831             rinv23           = gmx_mm_invsqrt_pd(rsq23);
1832             rinv31           = gmx_mm_invsqrt_pd(rsq31);
1833             rinv32           = gmx_mm_invsqrt_pd(rsq32);
1834             rinv33           = gmx_mm_invsqrt_pd(rsq33);
1835
1836             rinvsq00         = gmx_mm_inv_pd(rsq00);
1837             rinvsq11         = _mm_mul_pd(rinv11,rinv11);
1838             rinvsq12         = _mm_mul_pd(rinv12,rinv12);
1839             rinvsq13         = _mm_mul_pd(rinv13,rinv13);
1840             rinvsq21         = _mm_mul_pd(rinv21,rinv21);
1841             rinvsq22         = _mm_mul_pd(rinv22,rinv22);
1842             rinvsq23         = _mm_mul_pd(rinv23,rinv23);
1843             rinvsq31         = _mm_mul_pd(rinv31,rinv31);
1844             rinvsq32         = _mm_mul_pd(rinv32,rinv32);
1845             rinvsq33         = _mm_mul_pd(rinv33,rinv33);
1846
1847             fjx0             = _mm_setzero_pd();
1848             fjy0             = _mm_setzero_pd();
1849             fjz0             = _mm_setzero_pd();
1850             fjx1             = _mm_setzero_pd();
1851             fjy1             = _mm_setzero_pd();
1852             fjz1             = _mm_setzero_pd();
1853             fjx2             = _mm_setzero_pd();
1854             fjy2             = _mm_setzero_pd();
1855             fjz2             = _mm_setzero_pd();
1856             fjx3             = _mm_setzero_pd();
1857             fjy3             = _mm_setzero_pd();
1858             fjz3             = _mm_setzero_pd();
1859
1860             /**************************
1861              * CALCULATE INTERACTIONS *
1862              **************************/
1863
1864             if (gmx_mm_any_lt(rsq00,rcutoff2))
1865             {
1866
1867             /* LENNARD-JONES DISPERSION/REPULSION */
1868
1869             rinvsix          = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
1870             fvdw             = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(c12_00,rinvsix),c6_00),_mm_mul_pd(rinvsix,rinvsq00));
1871
1872             cutoff_mask      = _mm_cmplt_pd(rsq00,rcutoff2);
1873
1874             fscal            = fvdw;
1875
1876             fscal            = _mm_and_pd(fscal,cutoff_mask);
1877
1878             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1879
1880             /* Calculate temporary vectorial force */
1881             tx               = _mm_mul_pd(fscal,dx00);
1882             ty               = _mm_mul_pd(fscal,dy00);
1883             tz               = _mm_mul_pd(fscal,dz00);
1884
1885             /* Update vectorial force */
1886             fix0             = _mm_add_pd(fix0,tx);
1887             fiy0             = _mm_add_pd(fiy0,ty);
1888             fiz0             = _mm_add_pd(fiz0,tz);
1889
1890             fjx0             = _mm_add_pd(fjx0,tx);
1891             fjy0             = _mm_add_pd(fjy0,ty);
1892             fjz0             = _mm_add_pd(fjz0,tz);
1893
1894             }
1895
1896             /**************************
1897              * CALCULATE INTERACTIONS *
1898              **************************/
1899
1900             if (gmx_mm_any_lt(rsq11,rcutoff2))
1901             {
1902
1903             /* REACTION-FIELD ELECTROSTATICS */
1904             felec            = _mm_mul_pd(qq11,_mm_sub_pd(_mm_mul_pd(rinv11,rinvsq11),krf2));
1905
1906             cutoff_mask      = _mm_cmplt_pd(rsq11,rcutoff2);
1907
1908             fscal            = felec;
1909
1910             fscal            = _mm_and_pd(fscal,cutoff_mask);
1911
1912             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1913
1914             /* Calculate temporary vectorial force */
1915             tx               = _mm_mul_pd(fscal,dx11);
1916             ty               = _mm_mul_pd(fscal,dy11);
1917             tz               = _mm_mul_pd(fscal,dz11);
1918
1919             /* Update vectorial force */
1920             fix1             = _mm_add_pd(fix1,tx);
1921             fiy1             = _mm_add_pd(fiy1,ty);
1922             fiz1             = _mm_add_pd(fiz1,tz);
1923
1924             fjx1             = _mm_add_pd(fjx1,tx);
1925             fjy1             = _mm_add_pd(fjy1,ty);
1926             fjz1             = _mm_add_pd(fjz1,tz);
1927
1928             }
1929
1930             /**************************
1931              * CALCULATE INTERACTIONS *
1932              **************************/
1933
1934             if (gmx_mm_any_lt(rsq12,rcutoff2))
1935             {
1936
1937             /* REACTION-FIELD ELECTROSTATICS */
1938             felec            = _mm_mul_pd(qq12,_mm_sub_pd(_mm_mul_pd(rinv12,rinvsq12),krf2));
1939
1940             cutoff_mask      = _mm_cmplt_pd(rsq12,rcutoff2);
1941
1942             fscal            = felec;
1943
1944             fscal            = _mm_and_pd(fscal,cutoff_mask);
1945
1946             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1947
1948             /* Calculate temporary vectorial force */
1949             tx               = _mm_mul_pd(fscal,dx12);
1950             ty               = _mm_mul_pd(fscal,dy12);
1951             tz               = _mm_mul_pd(fscal,dz12);
1952
1953             /* Update vectorial force */
1954             fix1             = _mm_add_pd(fix1,tx);
1955             fiy1             = _mm_add_pd(fiy1,ty);
1956             fiz1             = _mm_add_pd(fiz1,tz);
1957
1958             fjx2             = _mm_add_pd(fjx2,tx);
1959             fjy2             = _mm_add_pd(fjy2,ty);
1960             fjz2             = _mm_add_pd(fjz2,tz);
1961
1962             }
1963
1964             /**************************
1965              * CALCULATE INTERACTIONS *
1966              **************************/
1967
1968             if (gmx_mm_any_lt(rsq13,rcutoff2))
1969             {
1970
1971             /* REACTION-FIELD ELECTROSTATICS */
1972             felec            = _mm_mul_pd(qq13,_mm_sub_pd(_mm_mul_pd(rinv13,rinvsq13),krf2));
1973
1974             cutoff_mask      = _mm_cmplt_pd(rsq13,rcutoff2);
1975
1976             fscal            = felec;
1977
1978             fscal            = _mm_and_pd(fscal,cutoff_mask);
1979
1980             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1981
1982             /* Calculate temporary vectorial force */
1983             tx               = _mm_mul_pd(fscal,dx13);
1984             ty               = _mm_mul_pd(fscal,dy13);
1985             tz               = _mm_mul_pd(fscal,dz13);
1986
1987             /* Update vectorial force */
1988             fix1             = _mm_add_pd(fix1,tx);
1989             fiy1             = _mm_add_pd(fiy1,ty);
1990             fiz1             = _mm_add_pd(fiz1,tz);
1991
1992             fjx3             = _mm_add_pd(fjx3,tx);
1993             fjy3             = _mm_add_pd(fjy3,ty);
1994             fjz3             = _mm_add_pd(fjz3,tz);
1995
1996             }
1997
1998             /**************************
1999              * CALCULATE INTERACTIONS *
2000              **************************/
2001
2002             if (gmx_mm_any_lt(rsq21,rcutoff2))
2003             {
2004
2005             /* REACTION-FIELD ELECTROSTATICS */
2006             felec            = _mm_mul_pd(qq21,_mm_sub_pd(_mm_mul_pd(rinv21,rinvsq21),krf2));
2007
2008             cutoff_mask      = _mm_cmplt_pd(rsq21,rcutoff2);
2009
2010             fscal            = felec;
2011
2012             fscal            = _mm_and_pd(fscal,cutoff_mask);
2013
2014             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
2015
2016             /* Calculate temporary vectorial force */
2017             tx               = _mm_mul_pd(fscal,dx21);
2018             ty               = _mm_mul_pd(fscal,dy21);
2019             tz               = _mm_mul_pd(fscal,dz21);
2020
2021             /* Update vectorial force */
2022             fix2             = _mm_add_pd(fix2,tx);
2023             fiy2             = _mm_add_pd(fiy2,ty);
2024             fiz2             = _mm_add_pd(fiz2,tz);
2025
2026             fjx1             = _mm_add_pd(fjx1,tx);
2027             fjy1             = _mm_add_pd(fjy1,ty);
2028             fjz1             = _mm_add_pd(fjz1,tz);
2029
2030             }
2031
2032             /**************************
2033              * CALCULATE INTERACTIONS *
2034              **************************/
2035
2036             if (gmx_mm_any_lt(rsq22,rcutoff2))
2037             {
2038
2039             /* REACTION-FIELD ELECTROSTATICS */
2040             felec            = _mm_mul_pd(qq22,_mm_sub_pd(_mm_mul_pd(rinv22,rinvsq22),krf2));
2041
2042             cutoff_mask      = _mm_cmplt_pd(rsq22,rcutoff2);
2043
2044             fscal            = felec;
2045
2046             fscal            = _mm_and_pd(fscal,cutoff_mask);
2047
2048             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
2049
2050             /* Calculate temporary vectorial force */
2051             tx               = _mm_mul_pd(fscal,dx22);
2052             ty               = _mm_mul_pd(fscal,dy22);
2053             tz               = _mm_mul_pd(fscal,dz22);
2054
2055             /* Update vectorial force */
2056             fix2             = _mm_add_pd(fix2,tx);
2057             fiy2             = _mm_add_pd(fiy2,ty);
2058             fiz2             = _mm_add_pd(fiz2,tz);
2059
2060             fjx2             = _mm_add_pd(fjx2,tx);
2061             fjy2             = _mm_add_pd(fjy2,ty);
2062             fjz2             = _mm_add_pd(fjz2,tz);
2063
2064             }
2065
2066             /**************************
2067              * CALCULATE INTERACTIONS *
2068              **************************/
2069
2070             if (gmx_mm_any_lt(rsq23,rcutoff2))
2071             {
2072
2073             /* REACTION-FIELD ELECTROSTATICS */
2074             felec            = _mm_mul_pd(qq23,_mm_sub_pd(_mm_mul_pd(rinv23,rinvsq23),krf2));
2075
2076             cutoff_mask      = _mm_cmplt_pd(rsq23,rcutoff2);
2077
2078             fscal            = felec;
2079
2080             fscal            = _mm_and_pd(fscal,cutoff_mask);
2081
2082             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
2083
2084             /* Calculate temporary vectorial force */
2085             tx               = _mm_mul_pd(fscal,dx23);
2086             ty               = _mm_mul_pd(fscal,dy23);
2087             tz               = _mm_mul_pd(fscal,dz23);
2088
2089             /* Update vectorial force */
2090             fix2             = _mm_add_pd(fix2,tx);
2091             fiy2             = _mm_add_pd(fiy2,ty);
2092             fiz2             = _mm_add_pd(fiz2,tz);
2093
2094             fjx3             = _mm_add_pd(fjx3,tx);
2095             fjy3             = _mm_add_pd(fjy3,ty);
2096             fjz3             = _mm_add_pd(fjz3,tz);
2097
2098             }
2099
2100             /**************************
2101              * CALCULATE INTERACTIONS *
2102              **************************/
2103
2104             if (gmx_mm_any_lt(rsq31,rcutoff2))
2105             {
2106
2107             /* REACTION-FIELD ELECTROSTATICS */
2108             felec            = _mm_mul_pd(qq31,_mm_sub_pd(_mm_mul_pd(rinv31,rinvsq31),krf2));
2109
2110             cutoff_mask      = _mm_cmplt_pd(rsq31,rcutoff2);
2111
2112             fscal            = felec;
2113
2114             fscal            = _mm_and_pd(fscal,cutoff_mask);
2115
2116             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
2117
2118             /* Calculate temporary vectorial force */
2119             tx               = _mm_mul_pd(fscal,dx31);
2120             ty               = _mm_mul_pd(fscal,dy31);
2121             tz               = _mm_mul_pd(fscal,dz31);
2122
2123             /* Update vectorial force */
2124             fix3             = _mm_add_pd(fix3,tx);
2125             fiy3             = _mm_add_pd(fiy3,ty);
2126             fiz3             = _mm_add_pd(fiz3,tz);
2127
2128             fjx1             = _mm_add_pd(fjx1,tx);
2129             fjy1             = _mm_add_pd(fjy1,ty);
2130             fjz1             = _mm_add_pd(fjz1,tz);
2131
2132             }
2133
2134             /**************************
2135              * CALCULATE INTERACTIONS *
2136              **************************/
2137
2138             if (gmx_mm_any_lt(rsq32,rcutoff2))
2139             {
2140
2141             /* REACTION-FIELD ELECTROSTATICS */
2142             felec            = _mm_mul_pd(qq32,_mm_sub_pd(_mm_mul_pd(rinv32,rinvsq32),krf2));
2143
2144             cutoff_mask      = _mm_cmplt_pd(rsq32,rcutoff2);
2145
2146             fscal            = felec;
2147
2148             fscal            = _mm_and_pd(fscal,cutoff_mask);
2149
2150             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
2151
2152             /* Calculate temporary vectorial force */
2153             tx               = _mm_mul_pd(fscal,dx32);
2154             ty               = _mm_mul_pd(fscal,dy32);
2155             tz               = _mm_mul_pd(fscal,dz32);
2156
2157             /* Update vectorial force */
2158             fix3             = _mm_add_pd(fix3,tx);
2159             fiy3             = _mm_add_pd(fiy3,ty);
2160             fiz3             = _mm_add_pd(fiz3,tz);
2161
2162             fjx2             = _mm_add_pd(fjx2,tx);
2163             fjy2             = _mm_add_pd(fjy2,ty);
2164             fjz2             = _mm_add_pd(fjz2,tz);
2165
2166             }
2167
2168             /**************************
2169              * CALCULATE INTERACTIONS *
2170              **************************/
2171
2172             if (gmx_mm_any_lt(rsq33,rcutoff2))
2173             {
2174
2175             /* REACTION-FIELD ELECTROSTATICS */
2176             felec            = _mm_mul_pd(qq33,_mm_sub_pd(_mm_mul_pd(rinv33,rinvsq33),krf2));
2177
2178             cutoff_mask      = _mm_cmplt_pd(rsq33,rcutoff2);
2179
2180             fscal            = felec;
2181
2182             fscal            = _mm_and_pd(fscal,cutoff_mask);
2183
2184             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
2185
2186             /* Calculate temporary vectorial force */
2187             tx               = _mm_mul_pd(fscal,dx33);
2188             ty               = _mm_mul_pd(fscal,dy33);
2189             tz               = _mm_mul_pd(fscal,dz33);
2190
2191             /* Update vectorial force */
2192             fix3             = _mm_add_pd(fix3,tx);
2193             fiy3             = _mm_add_pd(fiy3,ty);
2194             fiz3             = _mm_add_pd(fiz3,tz);
2195
2196             fjx3             = _mm_add_pd(fjx3,tx);
2197             fjy3             = _mm_add_pd(fjy3,ty);
2198             fjz3             = _mm_add_pd(fjz3,tz);
2199
2200             }
2201
2202             gmx_mm_decrement_4rvec_1ptr_swizzle_pd(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
2203
2204             /* Inner loop uses 303 flops */
2205         }
2206
2207         /* End of innermost loop */
2208
2209         gmx_mm_update_iforce_4atom_swizzle_pd(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
2210                                               f+i_coord_offset,fshift+i_shift_offset);
2211
2212         /* Increment number of inner iterations */
2213         inneriter                  += j_index_end - j_index_start;
2214
2215         /* Outer loop uses 24 flops */
2216     }
2217
2218     /* Increment number of outer iterations */
2219     outeriter        += nri;
2220
2221     /* Update outer/inner flops */
2222
2223     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*303);
2224 }