Merge release-5-0 into master
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_sse4_1_double / nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_sse4_1_double.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the GROMACS sse4_1_double kernel generator.
37  */
38 #include "gmxpre.h"
39
40 #include "config.h"
41
42 #include <math.h>
43
44 #include "../nb_kernel.h"
45 #include "gromacs/legacyheaders/types/simple.h"
46 #include "gromacs/math/vec.h"
47 #include "gromacs/legacyheaders/nrnb.h"
48
49 #include "gromacs/simd/math_x86_sse4_1_double.h"
50 #include "kernelutil_x86_sse4_1_double.h"
51
52 /*
53  * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_sse4_1_double
54  * Electrostatics interaction: ReactionField
55  * VdW interaction:            LennardJones
56  * Geometry:                   Water3-Water3
57  * Calculate force/pot:        PotentialAndForce
58  */
59 void
60 nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_sse4_1_double
61                     (t_nblist                    * gmx_restrict       nlist,
62                      rvec                        * gmx_restrict          xx,
63                      rvec                        * gmx_restrict          ff,
64                      t_forcerec                  * gmx_restrict          fr,
65                      t_mdatoms                   * gmx_restrict     mdatoms,
66                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
67                      t_nrnb                      * gmx_restrict        nrnb)
68 {
69     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
70      * just 0 for non-waters.
71      * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
72      * jnr indices corresponding to data put in the four positions in the SIMD register.
73      */
74     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
75     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
76     int              jnrA,jnrB;
77     int              j_coord_offsetA,j_coord_offsetB;
78     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
79     real             rcutoff_scalar;
80     real             *shiftvec,*fshift,*x,*f;
81     __m128d          tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
82     int              vdwioffset0;
83     __m128d          ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
84     int              vdwioffset1;
85     __m128d          ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
86     int              vdwioffset2;
87     __m128d          ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
88     int              vdwjidx0A,vdwjidx0B;
89     __m128d          jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
90     int              vdwjidx1A,vdwjidx1B;
91     __m128d          jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
92     int              vdwjidx2A,vdwjidx2B;
93     __m128d          jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
94     __m128d          dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
95     __m128d          dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
96     __m128d          dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
97     __m128d          dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
98     __m128d          dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
99     __m128d          dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
100     __m128d          dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
101     __m128d          dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
102     __m128d          dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
103     __m128d          velec,felec,velecsum,facel,crf,krf,krf2;
104     real             *charge;
105     int              nvdwtype;
106     __m128d          rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
107     int              *vdwtype;
108     real             *vdwparam;
109     __m128d          one_sixth   = _mm_set1_pd(1.0/6.0);
110     __m128d          one_twelfth = _mm_set1_pd(1.0/12.0);
111     __m128d          dummy_mask,cutoff_mask;
112     __m128d          signbit   = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
113     __m128d          one     = _mm_set1_pd(1.0);
114     __m128d          two     = _mm_set1_pd(2.0);
115     x                = xx[0];
116     f                = ff[0];
117
118     nri              = nlist->nri;
119     iinr             = nlist->iinr;
120     jindex           = nlist->jindex;
121     jjnr             = nlist->jjnr;
122     shiftidx         = nlist->shift;
123     gid              = nlist->gid;
124     shiftvec         = fr->shift_vec[0];
125     fshift           = fr->fshift[0];
126     facel            = _mm_set1_pd(fr->epsfac);
127     charge           = mdatoms->chargeA;
128     krf              = _mm_set1_pd(fr->ic->k_rf);
129     krf2             = _mm_set1_pd(fr->ic->k_rf*2.0);
130     crf              = _mm_set1_pd(fr->ic->c_rf);
131     nvdwtype         = fr->ntype;
132     vdwparam         = fr->nbfp;
133     vdwtype          = mdatoms->typeA;
134
135     /* Setup water-specific parameters */
136     inr              = nlist->iinr[0];
137     iq0              = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+0]));
138     iq1              = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+1]));
139     iq2              = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+2]));
140     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
141
142     jq0              = _mm_set1_pd(charge[inr+0]);
143     jq1              = _mm_set1_pd(charge[inr+1]);
144     jq2              = _mm_set1_pd(charge[inr+2]);
145     vdwjidx0A        = 2*vdwtype[inr+0];
146     qq00             = _mm_mul_pd(iq0,jq0);
147     c6_00            = _mm_set1_pd(vdwparam[vdwioffset0+vdwjidx0A]);
148     c12_00           = _mm_set1_pd(vdwparam[vdwioffset0+vdwjidx0A+1]);
149     qq01             = _mm_mul_pd(iq0,jq1);
150     qq02             = _mm_mul_pd(iq0,jq2);
151     qq10             = _mm_mul_pd(iq1,jq0);
152     qq11             = _mm_mul_pd(iq1,jq1);
153     qq12             = _mm_mul_pd(iq1,jq2);
154     qq20             = _mm_mul_pd(iq2,jq0);
155     qq21             = _mm_mul_pd(iq2,jq1);
156     qq22             = _mm_mul_pd(iq2,jq2);
157
158     /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
159     rcutoff_scalar   = fr->rcoulomb;
160     rcutoff          = _mm_set1_pd(rcutoff_scalar);
161     rcutoff2         = _mm_mul_pd(rcutoff,rcutoff);
162
163     sh_vdw_invrcut6  = _mm_set1_pd(fr->ic->sh_invrc6);
164     rvdw             = _mm_set1_pd(fr->rvdw);
165
166     /* Avoid stupid compiler warnings */
167     jnrA = jnrB = 0;
168     j_coord_offsetA = 0;
169     j_coord_offsetB = 0;
170
171     outeriter        = 0;
172     inneriter        = 0;
173
174     /* Start outer loop over neighborlists */
175     for(iidx=0; iidx<nri; iidx++)
176     {
177         /* Load shift vector for this list */
178         i_shift_offset   = DIM*shiftidx[iidx];
179
180         /* Load limits for loop over neighbors */
181         j_index_start    = jindex[iidx];
182         j_index_end      = jindex[iidx+1];
183
184         /* Get outer coordinate index */
185         inr              = iinr[iidx];
186         i_coord_offset   = DIM*inr;
187
188         /* Load i particle coords and add shift vector */
189         gmx_mm_load_shift_and_3rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,
190                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
191
192         fix0             = _mm_setzero_pd();
193         fiy0             = _mm_setzero_pd();
194         fiz0             = _mm_setzero_pd();
195         fix1             = _mm_setzero_pd();
196         fiy1             = _mm_setzero_pd();
197         fiz1             = _mm_setzero_pd();
198         fix2             = _mm_setzero_pd();
199         fiy2             = _mm_setzero_pd();
200         fiz2             = _mm_setzero_pd();
201
202         /* Reset potential sums */
203         velecsum         = _mm_setzero_pd();
204         vvdwsum          = _mm_setzero_pd();
205
206         /* Start inner kernel loop */
207         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
208         {
209
210             /* Get j neighbor index, and coordinate index */
211             jnrA             = jjnr[jidx];
212             jnrB             = jjnr[jidx+1];
213             j_coord_offsetA  = DIM*jnrA;
214             j_coord_offsetB  = DIM*jnrB;
215
216             /* load j atom coordinates */
217             gmx_mm_load_3rvec_2ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
218                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
219
220             /* Calculate displacement vector */
221             dx00             = _mm_sub_pd(ix0,jx0);
222             dy00             = _mm_sub_pd(iy0,jy0);
223             dz00             = _mm_sub_pd(iz0,jz0);
224             dx01             = _mm_sub_pd(ix0,jx1);
225             dy01             = _mm_sub_pd(iy0,jy1);
226             dz01             = _mm_sub_pd(iz0,jz1);
227             dx02             = _mm_sub_pd(ix0,jx2);
228             dy02             = _mm_sub_pd(iy0,jy2);
229             dz02             = _mm_sub_pd(iz0,jz2);
230             dx10             = _mm_sub_pd(ix1,jx0);
231             dy10             = _mm_sub_pd(iy1,jy0);
232             dz10             = _mm_sub_pd(iz1,jz0);
233             dx11             = _mm_sub_pd(ix1,jx1);
234             dy11             = _mm_sub_pd(iy1,jy1);
235             dz11             = _mm_sub_pd(iz1,jz1);
236             dx12             = _mm_sub_pd(ix1,jx2);
237             dy12             = _mm_sub_pd(iy1,jy2);
238             dz12             = _mm_sub_pd(iz1,jz2);
239             dx20             = _mm_sub_pd(ix2,jx0);
240             dy20             = _mm_sub_pd(iy2,jy0);
241             dz20             = _mm_sub_pd(iz2,jz0);
242             dx21             = _mm_sub_pd(ix2,jx1);
243             dy21             = _mm_sub_pd(iy2,jy1);
244             dz21             = _mm_sub_pd(iz2,jz1);
245             dx22             = _mm_sub_pd(ix2,jx2);
246             dy22             = _mm_sub_pd(iy2,jy2);
247             dz22             = _mm_sub_pd(iz2,jz2);
248
249             /* Calculate squared distance and things based on it */
250             rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
251             rsq01            = gmx_mm_calc_rsq_pd(dx01,dy01,dz01);
252             rsq02            = gmx_mm_calc_rsq_pd(dx02,dy02,dz02);
253             rsq10            = gmx_mm_calc_rsq_pd(dx10,dy10,dz10);
254             rsq11            = gmx_mm_calc_rsq_pd(dx11,dy11,dz11);
255             rsq12            = gmx_mm_calc_rsq_pd(dx12,dy12,dz12);
256             rsq20            = gmx_mm_calc_rsq_pd(dx20,dy20,dz20);
257             rsq21            = gmx_mm_calc_rsq_pd(dx21,dy21,dz21);
258             rsq22            = gmx_mm_calc_rsq_pd(dx22,dy22,dz22);
259
260             rinv00           = gmx_mm_invsqrt_pd(rsq00);
261             rinv01           = gmx_mm_invsqrt_pd(rsq01);
262             rinv02           = gmx_mm_invsqrt_pd(rsq02);
263             rinv10           = gmx_mm_invsqrt_pd(rsq10);
264             rinv11           = gmx_mm_invsqrt_pd(rsq11);
265             rinv12           = gmx_mm_invsqrt_pd(rsq12);
266             rinv20           = gmx_mm_invsqrt_pd(rsq20);
267             rinv21           = gmx_mm_invsqrt_pd(rsq21);
268             rinv22           = gmx_mm_invsqrt_pd(rsq22);
269
270             rinvsq00         = _mm_mul_pd(rinv00,rinv00);
271             rinvsq01         = _mm_mul_pd(rinv01,rinv01);
272             rinvsq02         = _mm_mul_pd(rinv02,rinv02);
273             rinvsq10         = _mm_mul_pd(rinv10,rinv10);
274             rinvsq11         = _mm_mul_pd(rinv11,rinv11);
275             rinvsq12         = _mm_mul_pd(rinv12,rinv12);
276             rinvsq20         = _mm_mul_pd(rinv20,rinv20);
277             rinvsq21         = _mm_mul_pd(rinv21,rinv21);
278             rinvsq22         = _mm_mul_pd(rinv22,rinv22);
279
280             fjx0             = _mm_setzero_pd();
281             fjy0             = _mm_setzero_pd();
282             fjz0             = _mm_setzero_pd();
283             fjx1             = _mm_setzero_pd();
284             fjy1             = _mm_setzero_pd();
285             fjz1             = _mm_setzero_pd();
286             fjx2             = _mm_setzero_pd();
287             fjy2             = _mm_setzero_pd();
288             fjz2             = _mm_setzero_pd();
289
290             /**************************
291              * CALCULATE INTERACTIONS *
292              **************************/
293
294             if (gmx_mm_any_lt(rsq00,rcutoff2))
295             {
296
297             /* REACTION-FIELD ELECTROSTATICS */
298             velec            = _mm_mul_pd(qq00,_mm_sub_pd(_mm_add_pd(rinv00,_mm_mul_pd(krf,rsq00)),crf));
299             felec            = _mm_mul_pd(qq00,_mm_sub_pd(_mm_mul_pd(rinv00,rinvsq00),krf2));
300
301             /* LENNARD-JONES DISPERSION/REPULSION */
302
303             rinvsix          = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
304             vvdw6            = _mm_mul_pd(c6_00,rinvsix);
305             vvdw12           = _mm_mul_pd(c12_00,_mm_mul_pd(rinvsix,rinvsix));
306             vvdw             = _mm_sub_pd(_mm_mul_pd( _mm_sub_pd(vvdw12 , _mm_mul_pd(c12_00,_mm_mul_pd(sh_vdw_invrcut6,sh_vdw_invrcut6))), one_twelfth) ,
307                                           _mm_mul_pd( _mm_sub_pd(vvdw6,_mm_mul_pd(c6_00,sh_vdw_invrcut6)),one_sixth));
308             fvdw             = _mm_mul_pd(_mm_sub_pd(vvdw12,vvdw6),rinvsq00);
309
310             cutoff_mask      = _mm_cmplt_pd(rsq00,rcutoff2);
311
312             /* Update potential sum for this i atom from the interaction with this j atom. */
313             velec            = _mm_and_pd(velec,cutoff_mask);
314             velecsum         = _mm_add_pd(velecsum,velec);
315             vvdw             = _mm_and_pd(vvdw,cutoff_mask);
316             vvdwsum          = _mm_add_pd(vvdwsum,vvdw);
317
318             fscal            = _mm_add_pd(felec,fvdw);
319
320             fscal            = _mm_and_pd(fscal,cutoff_mask);
321
322             /* Calculate temporary vectorial force */
323             tx               = _mm_mul_pd(fscal,dx00);
324             ty               = _mm_mul_pd(fscal,dy00);
325             tz               = _mm_mul_pd(fscal,dz00);
326
327             /* Update vectorial force */
328             fix0             = _mm_add_pd(fix0,tx);
329             fiy0             = _mm_add_pd(fiy0,ty);
330             fiz0             = _mm_add_pd(fiz0,tz);
331
332             fjx0             = _mm_add_pd(fjx0,tx);
333             fjy0             = _mm_add_pd(fjy0,ty);
334             fjz0             = _mm_add_pd(fjz0,tz);
335
336             }
337
338             /**************************
339              * CALCULATE INTERACTIONS *
340              **************************/
341
342             if (gmx_mm_any_lt(rsq01,rcutoff2))
343             {
344
345             /* REACTION-FIELD ELECTROSTATICS */
346             velec            = _mm_mul_pd(qq01,_mm_sub_pd(_mm_add_pd(rinv01,_mm_mul_pd(krf,rsq01)),crf));
347             felec            = _mm_mul_pd(qq01,_mm_sub_pd(_mm_mul_pd(rinv01,rinvsq01),krf2));
348
349             cutoff_mask      = _mm_cmplt_pd(rsq01,rcutoff2);
350
351             /* Update potential sum for this i atom from the interaction with this j atom. */
352             velec            = _mm_and_pd(velec,cutoff_mask);
353             velecsum         = _mm_add_pd(velecsum,velec);
354
355             fscal            = felec;
356
357             fscal            = _mm_and_pd(fscal,cutoff_mask);
358
359             /* Calculate temporary vectorial force */
360             tx               = _mm_mul_pd(fscal,dx01);
361             ty               = _mm_mul_pd(fscal,dy01);
362             tz               = _mm_mul_pd(fscal,dz01);
363
364             /* Update vectorial force */
365             fix0             = _mm_add_pd(fix0,tx);
366             fiy0             = _mm_add_pd(fiy0,ty);
367             fiz0             = _mm_add_pd(fiz0,tz);
368
369             fjx1             = _mm_add_pd(fjx1,tx);
370             fjy1             = _mm_add_pd(fjy1,ty);
371             fjz1             = _mm_add_pd(fjz1,tz);
372
373             }
374
375             /**************************
376              * CALCULATE INTERACTIONS *
377              **************************/
378
379             if (gmx_mm_any_lt(rsq02,rcutoff2))
380             {
381
382             /* REACTION-FIELD ELECTROSTATICS */
383             velec            = _mm_mul_pd(qq02,_mm_sub_pd(_mm_add_pd(rinv02,_mm_mul_pd(krf,rsq02)),crf));
384             felec            = _mm_mul_pd(qq02,_mm_sub_pd(_mm_mul_pd(rinv02,rinvsq02),krf2));
385
386             cutoff_mask      = _mm_cmplt_pd(rsq02,rcutoff2);
387
388             /* Update potential sum for this i atom from the interaction with this j atom. */
389             velec            = _mm_and_pd(velec,cutoff_mask);
390             velecsum         = _mm_add_pd(velecsum,velec);
391
392             fscal            = felec;
393
394             fscal            = _mm_and_pd(fscal,cutoff_mask);
395
396             /* Calculate temporary vectorial force */
397             tx               = _mm_mul_pd(fscal,dx02);
398             ty               = _mm_mul_pd(fscal,dy02);
399             tz               = _mm_mul_pd(fscal,dz02);
400
401             /* Update vectorial force */
402             fix0             = _mm_add_pd(fix0,tx);
403             fiy0             = _mm_add_pd(fiy0,ty);
404             fiz0             = _mm_add_pd(fiz0,tz);
405
406             fjx2             = _mm_add_pd(fjx2,tx);
407             fjy2             = _mm_add_pd(fjy2,ty);
408             fjz2             = _mm_add_pd(fjz2,tz);
409
410             }
411
412             /**************************
413              * CALCULATE INTERACTIONS *
414              **************************/
415
416             if (gmx_mm_any_lt(rsq10,rcutoff2))
417             {
418
419             /* REACTION-FIELD ELECTROSTATICS */
420             velec            = _mm_mul_pd(qq10,_mm_sub_pd(_mm_add_pd(rinv10,_mm_mul_pd(krf,rsq10)),crf));
421             felec            = _mm_mul_pd(qq10,_mm_sub_pd(_mm_mul_pd(rinv10,rinvsq10),krf2));
422
423             cutoff_mask      = _mm_cmplt_pd(rsq10,rcutoff2);
424
425             /* Update potential sum for this i atom from the interaction with this j atom. */
426             velec            = _mm_and_pd(velec,cutoff_mask);
427             velecsum         = _mm_add_pd(velecsum,velec);
428
429             fscal            = felec;
430
431             fscal            = _mm_and_pd(fscal,cutoff_mask);
432
433             /* Calculate temporary vectorial force */
434             tx               = _mm_mul_pd(fscal,dx10);
435             ty               = _mm_mul_pd(fscal,dy10);
436             tz               = _mm_mul_pd(fscal,dz10);
437
438             /* Update vectorial force */
439             fix1             = _mm_add_pd(fix1,tx);
440             fiy1             = _mm_add_pd(fiy1,ty);
441             fiz1             = _mm_add_pd(fiz1,tz);
442
443             fjx0             = _mm_add_pd(fjx0,tx);
444             fjy0             = _mm_add_pd(fjy0,ty);
445             fjz0             = _mm_add_pd(fjz0,tz);
446
447             }
448
449             /**************************
450              * CALCULATE INTERACTIONS *
451              **************************/
452
453             if (gmx_mm_any_lt(rsq11,rcutoff2))
454             {
455
456             /* REACTION-FIELD ELECTROSTATICS */
457             velec            = _mm_mul_pd(qq11,_mm_sub_pd(_mm_add_pd(rinv11,_mm_mul_pd(krf,rsq11)),crf));
458             felec            = _mm_mul_pd(qq11,_mm_sub_pd(_mm_mul_pd(rinv11,rinvsq11),krf2));
459
460             cutoff_mask      = _mm_cmplt_pd(rsq11,rcutoff2);
461
462             /* Update potential sum for this i atom from the interaction with this j atom. */
463             velec            = _mm_and_pd(velec,cutoff_mask);
464             velecsum         = _mm_add_pd(velecsum,velec);
465
466             fscal            = felec;
467
468             fscal            = _mm_and_pd(fscal,cutoff_mask);
469
470             /* Calculate temporary vectorial force */
471             tx               = _mm_mul_pd(fscal,dx11);
472             ty               = _mm_mul_pd(fscal,dy11);
473             tz               = _mm_mul_pd(fscal,dz11);
474
475             /* Update vectorial force */
476             fix1             = _mm_add_pd(fix1,tx);
477             fiy1             = _mm_add_pd(fiy1,ty);
478             fiz1             = _mm_add_pd(fiz1,tz);
479
480             fjx1             = _mm_add_pd(fjx1,tx);
481             fjy1             = _mm_add_pd(fjy1,ty);
482             fjz1             = _mm_add_pd(fjz1,tz);
483
484             }
485
486             /**************************
487              * CALCULATE INTERACTIONS *
488              **************************/
489
490             if (gmx_mm_any_lt(rsq12,rcutoff2))
491             {
492
493             /* REACTION-FIELD ELECTROSTATICS */
494             velec            = _mm_mul_pd(qq12,_mm_sub_pd(_mm_add_pd(rinv12,_mm_mul_pd(krf,rsq12)),crf));
495             felec            = _mm_mul_pd(qq12,_mm_sub_pd(_mm_mul_pd(rinv12,rinvsq12),krf2));
496
497             cutoff_mask      = _mm_cmplt_pd(rsq12,rcutoff2);
498
499             /* Update potential sum for this i atom from the interaction with this j atom. */
500             velec            = _mm_and_pd(velec,cutoff_mask);
501             velecsum         = _mm_add_pd(velecsum,velec);
502
503             fscal            = felec;
504
505             fscal            = _mm_and_pd(fscal,cutoff_mask);
506
507             /* Calculate temporary vectorial force */
508             tx               = _mm_mul_pd(fscal,dx12);
509             ty               = _mm_mul_pd(fscal,dy12);
510             tz               = _mm_mul_pd(fscal,dz12);
511
512             /* Update vectorial force */
513             fix1             = _mm_add_pd(fix1,tx);
514             fiy1             = _mm_add_pd(fiy1,ty);
515             fiz1             = _mm_add_pd(fiz1,tz);
516
517             fjx2             = _mm_add_pd(fjx2,tx);
518             fjy2             = _mm_add_pd(fjy2,ty);
519             fjz2             = _mm_add_pd(fjz2,tz);
520
521             }
522
523             /**************************
524              * CALCULATE INTERACTIONS *
525              **************************/
526
527             if (gmx_mm_any_lt(rsq20,rcutoff2))
528             {
529
530             /* REACTION-FIELD ELECTROSTATICS */
531             velec            = _mm_mul_pd(qq20,_mm_sub_pd(_mm_add_pd(rinv20,_mm_mul_pd(krf,rsq20)),crf));
532             felec            = _mm_mul_pd(qq20,_mm_sub_pd(_mm_mul_pd(rinv20,rinvsq20),krf2));
533
534             cutoff_mask      = _mm_cmplt_pd(rsq20,rcutoff2);
535
536             /* Update potential sum for this i atom from the interaction with this j atom. */
537             velec            = _mm_and_pd(velec,cutoff_mask);
538             velecsum         = _mm_add_pd(velecsum,velec);
539
540             fscal            = felec;
541
542             fscal            = _mm_and_pd(fscal,cutoff_mask);
543
544             /* Calculate temporary vectorial force */
545             tx               = _mm_mul_pd(fscal,dx20);
546             ty               = _mm_mul_pd(fscal,dy20);
547             tz               = _mm_mul_pd(fscal,dz20);
548
549             /* Update vectorial force */
550             fix2             = _mm_add_pd(fix2,tx);
551             fiy2             = _mm_add_pd(fiy2,ty);
552             fiz2             = _mm_add_pd(fiz2,tz);
553
554             fjx0             = _mm_add_pd(fjx0,tx);
555             fjy0             = _mm_add_pd(fjy0,ty);
556             fjz0             = _mm_add_pd(fjz0,tz);
557
558             }
559
560             /**************************
561              * CALCULATE INTERACTIONS *
562              **************************/
563
564             if (gmx_mm_any_lt(rsq21,rcutoff2))
565             {
566
567             /* REACTION-FIELD ELECTROSTATICS */
568             velec            = _mm_mul_pd(qq21,_mm_sub_pd(_mm_add_pd(rinv21,_mm_mul_pd(krf,rsq21)),crf));
569             felec            = _mm_mul_pd(qq21,_mm_sub_pd(_mm_mul_pd(rinv21,rinvsq21),krf2));
570
571             cutoff_mask      = _mm_cmplt_pd(rsq21,rcutoff2);
572
573             /* Update potential sum for this i atom from the interaction with this j atom. */
574             velec            = _mm_and_pd(velec,cutoff_mask);
575             velecsum         = _mm_add_pd(velecsum,velec);
576
577             fscal            = felec;
578
579             fscal            = _mm_and_pd(fscal,cutoff_mask);
580
581             /* Calculate temporary vectorial force */
582             tx               = _mm_mul_pd(fscal,dx21);
583             ty               = _mm_mul_pd(fscal,dy21);
584             tz               = _mm_mul_pd(fscal,dz21);
585
586             /* Update vectorial force */
587             fix2             = _mm_add_pd(fix2,tx);
588             fiy2             = _mm_add_pd(fiy2,ty);
589             fiz2             = _mm_add_pd(fiz2,tz);
590
591             fjx1             = _mm_add_pd(fjx1,tx);
592             fjy1             = _mm_add_pd(fjy1,ty);
593             fjz1             = _mm_add_pd(fjz1,tz);
594
595             }
596
597             /**************************
598              * CALCULATE INTERACTIONS *
599              **************************/
600
601             if (gmx_mm_any_lt(rsq22,rcutoff2))
602             {
603
604             /* REACTION-FIELD ELECTROSTATICS */
605             velec            = _mm_mul_pd(qq22,_mm_sub_pd(_mm_add_pd(rinv22,_mm_mul_pd(krf,rsq22)),crf));
606             felec            = _mm_mul_pd(qq22,_mm_sub_pd(_mm_mul_pd(rinv22,rinvsq22),krf2));
607
608             cutoff_mask      = _mm_cmplt_pd(rsq22,rcutoff2);
609
610             /* Update potential sum for this i atom from the interaction with this j atom. */
611             velec            = _mm_and_pd(velec,cutoff_mask);
612             velecsum         = _mm_add_pd(velecsum,velec);
613
614             fscal            = felec;
615
616             fscal            = _mm_and_pd(fscal,cutoff_mask);
617
618             /* Calculate temporary vectorial force */
619             tx               = _mm_mul_pd(fscal,dx22);
620             ty               = _mm_mul_pd(fscal,dy22);
621             tz               = _mm_mul_pd(fscal,dz22);
622
623             /* Update vectorial force */
624             fix2             = _mm_add_pd(fix2,tx);
625             fiy2             = _mm_add_pd(fiy2,ty);
626             fiz2             = _mm_add_pd(fiz2,tz);
627
628             fjx2             = _mm_add_pd(fjx2,tx);
629             fjy2             = _mm_add_pd(fjy2,ty);
630             fjz2             = _mm_add_pd(fjz2,tz);
631
632             }
633
634             gmx_mm_decrement_3rvec_2ptr_swizzle_pd(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
635
636             /* Inner loop uses 342 flops */
637         }
638
639         if(jidx<j_index_end)
640         {
641
642             jnrA             = jjnr[jidx];
643             j_coord_offsetA  = DIM*jnrA;
644
645             /* load j atom coordinates */
646             gmx_mm_load_3rvec_1ptr_swizzle_pd(x+j_coord_offsetA,
647                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
648
649             /* Calculate displacement vector */
650             dx00             = _mm_sub_pd(ix0,jx0);
651             dy00             = _mm_sub_pd(iy0,jy0);
652             dz00             = _mm_sub_pd(iz0,jz0);
653             dx01             = _mm_sub_pd(ix0,jx1);
654             dy01             = _mm_sub_pd(iy0,jy1);
655             dz01             = _mm_sub_pd(iz0,jz1);
656             dx02             = _mm_sub_pd(ix0,jx2);
657             dy02             = _mm_sub_pd(iy0,jy2);
658             dz02             = _mm_sub_pd(iz0,jz2);
659             dx10             = _mm_sub_pd(ix1,jx0);
660             dy10             = _mm_sub_pd(iy1,jy0);
661             dz10             = _mm_sub_pd(iz1,jz0);
662             dx11             = _mm_sub_pd(ix1,jx1);
663             dy11             = _mm_sub_pd(iy1,jy1);
664             dz11             = _mm_sub_pd(iz1,jz1);
665             dx12             = _mm_sub_pd(ix1,jx2);
666             dy12             = _mm_sub_pd(iy1,jy2);
667             dz12             = _mm_sub_pd(iz1,jz2);
668             dx20             = _mm_sub_pd(ix2,jx0);
669             dy20             = _mm_sub_pd(iy2,jy0);
670             dz20             = _mm_sub_pd(iz2,jz0);
671             dx21             = _mm_sub_pd(ix2,jx1);
672             dy21             = _mm_sub_pd(iy2,jy1);
673             dz21             = _mm_sub_pd(iz2,jz1);
674             dx22             = _mm_sub_pd(ix2,jx2);
675             dy22             = _mm_sub_pd(iy2,jy2);
676             dz22             = _mm_sub_pd(iz2,jz2);
677
678             /* Calculate squared distance and things based on it */
679             rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
680             rsq01            = gmx_mm_calc_rsq_pd(dx01,dy01,dz01);
681             rsq02            = gmx_mm_calc_rsq_pd(dx02,dy02,dz02);
682             rsq10            = gmx_mm_calc_rsq_pd(dx10,dy10,dz10);
683             rsq11            = gmx_mm_calc_rsq_pd(dx11,dy11,dz11);
684             rsq12            = gmx_mm_calc_rsq_pd(dx12,dy12,dz12);
685             rsq20            = gmx_mm_calc_rsq_pd(dx20,dy20,dz20);
686             rsq21            = gmx_mm_calc_rsq_pd(dx21,dy21,dz21);
687             rsq22            = gmx_mm_calc_rsq_pd(dx22,dy22,dz22);
688
689             rinv00           = gmx_mm_invsqrt_pd(rsq00);
690             rinv01           = gmx_mm_invsqrt_pd(rsq01);
691             rinv02           = gmx_mm_invsqrt_pd(rsq02);
692             rinv10           = gmx_mm_invsqrt_pd(rsq10);
693             rinv11           = gmx_mm_invsqrt_pd(rsq11);
694             rinv12           = gmx_mm_invsqrt_pd(rsq12);
695             rinv20           = gmx_mm_invsqrt_pd(rsq20);
696             rinv21           = gmx_mm_invsqrt_pd(rsq21);
697             rinv22           = gmx_mm_invsqrt_pd(rsq22);
698
699             rinvsq00         = _mm_mul_pd(rinv00,rinv00);
700             rinvsq01         = _mm_mul_pd(rinv01,rinv01);
701             rinvsq02         = _mm_mul_pd(rinv02,rinv02);
702             rinvsq10         = _mm_mul_pd(rinv10,rinv10);
703             rinvsq11         = _mm_mul_pd(rinv11,rinv11);
704             rinvsq12         = _mm_mul_pd(rinv12,rinv12);
705             rinvsq20         = _mm_mul_pd(rinv20,rinv20);
706             rinvsq21         = _mm_mul_pd(rinv21,rinv21);
707             rinvsq22         = _mm_mul_pd(rinv22,rinv22);
708
709             fjx0             = _mm_setzero_pd();
710             fjy0             = _mm_setzero_pd();
711             fjz0             = _mm_setzero_pd();
712             fjx1             = _mm_setzero_pd();
713             fjy1             = _mm_setzero_pd();
714             fjz1             = _mm_setzero_pd();
715             fjx2             = _mm_setzero_pd();
716             fjy2             = _mm_setzero_pd();
717             fjz2             = _mm_setzero_pd();
718
719             /**************************
720              * CALCULATE INTERACTIONS *
721              **************************/
722
723             if (gmx_mm_any_lt(rsq00,rcutoff2))
724             {
725
726             /* REACTION-FIELD ELECTROSTATICS */
727             velec            = _mm_mul_pd(qq00,_mm_sub_pd(_mm_add_pd(rinv00,_mm_mul_pd(krf,rsq00)),crf));
728             felec            = _mm_mul_pd(qq00,_mm_sub_pd(_mm_mul_pd(rinv00,rinvsq00),krf2));
729
730             /* LENNARD-JONES DISPERSION/REPULSION */
731
732             rinvsix          = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
733             vvdw6            = _mm_mul_pd(c6_00,rinvsix);
734             vvdw12           = _mm_mul_pd(c12_00,_mm_mul_pd(rinvsix,rinvsix));
735             vvdw             = _mm_sub_pd(_mm_mul_pd( _mm_sub_pd(vvdw12 , _mm_mul_pd(c12_00,_mm_mul_pd(sh_vdw_invrcut6,sh_vdw_invrcut6))), one_twelfth) ,
736                                           _mm_mul_pd( _mm_sub_pd(vvdw6,_mm_mul_pd(c6_00,sh_vdw_invrcut6)),one_sixth));
737             fvdw             = _mm_mul_pd(_mm_sub_pd(vvdw12,vvdw6),rinvsq00);
738
739             cutoff_mask      = _mm_cmplt_pd(rsq00,rcutoff2);
740
741             /* Update potential sum for this i atom from the interaction with this j atom. */
742             velec            = _mm_and_pd(velec,cutoff_mask);
743             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
744             velecsum         = _mm_add_pd(velecsum,velec);
745             vvdw             = _mm_and_pd(vvdw,cutoff_mask);
746             vvdw             = _mm_unpacklo_pd(vvdw,_mm_setzero_pd());
747             vvdwsum          = _mm_add_pd(vvdwsum,vvdw);
748
749             fscal            = _mm_add_pd(felec,fvdw);
750
751             fscal            = _mm_and_pd(fscal,cutoff_mask);
752
753             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
754
755             /* Calculate temporary vectorial force */
756             tx               = _mm_mul_pd(fscal,dx00);
757             ty               = _mm_mul_pd(fscal,dy00);
758             tz               = _mm_mul_pd(fscal,dz00);
759
760             /* Update vectorial force */
761             fix0             = _mm_add_pd(fix0,tx);
762             fiy0             = _mm_add_pd(fiy0,ty);
763             fiz0             = _mm_add_pd(fiz0,tz);
764
765             fjx0             = _mm_add_pd(fjx0,tx);
766             fjy0             = _mm_add_pd(fjy0,ty);
767             fjz0             = _mm_add_pd(fjz0,tz);
768
769             }
770
771             /**************************
772              * CALCULATE INTERACTIONS *
773              **************************/
774
775             if (gmx_mm_any_lt(rsq01,rcutoff2))
776             {
777
778             /* REACTION-FIELD ELECTROSTATICS */
779             velec            = _mm_mul_pd(qq01,_mm_sub_pd(_mm_add_pd(rinv01,_mm_mul_pd(krf,rsq01)),crf));
780             felec            = _mm_mul_pd(qq01,_mm_sub_pd(_mm_mul_pd(rinv01,rinvsq01),krf2));
781
782             cutoff_mask      = _mm_cmplt_pd(rsq01,rcutoff2);
783
784             /* Update potential sum for this i atom from the interaction with this j atom. */
785             velec            = _mm_and_pd(velec,cutoff_mask);
786             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
787             velecsum         = _mm_add_pd(velecsum,velec);
788
789             fscal            = felec;
790
791             fscal            = _mm_and_pd(fscal,cutoff_mask);
792
793             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
794
795             /* Calculate temporary vectorial force */
796             tx               = _mm_mul_pd(fscal,dx01);
797             ty               = _mm_mul_pd(fscal,dy01);
798             tz               = _mm_mul_pd(fscal,dz01);
799
800             /* Update vectorial force */
801             fix0             = _mm_add_pd(fix0,tx);
802             fiy0             = _mm_add_pd(fiy0,ty);
803             fiz0             = _mm_add_pd(fiz0,tz);
804
805             fjx1             = _mm_add_pd(fjx1,tx);
806             fjy1             = _mm_add_pd(fjy1,ty);
807             fjz1             = _mm_add_pd(fjz1,tz);
808
809             }
810
811             /**************************
812              * CALCULATE INTERACTIONS *
813              **************************/
814
815             if (gmx_mm_any_lt(rsq02,rcutoff2))
816             {
817
818             /* REACTION-FIELD ELECTROSTATICS */
819             velec            = _mm_mul_pd(qq02,_mm_sub_pd(_mm_add_pd(rinv02,_mm_mul_pd(krf,rsq02)),crf));
820             felec            = _mm_mul_pd(qq02,_mm_sub_pd(_mm_mul_pd(rinv02,rinvsq02),krf2));
821
822             cutoff_mask      = _mm_cmplt_pd(rsq02,rcutoff2);
823
824             /* Update potential sum for this i atom from the interaction with this j atom. */
825             velec            = _mm_and_pd(velec,cutoff_mask);
826             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
827             velecsum         = _mm_add_pd(velecsum,velec);
828
829             fscal            = felec;
830
831             fscal            = _mm_and_pd(fscal,cutoff_mask);
832
833             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
834
835             /* Calculate temporary vectorial force */
836             tx               = _mm_mul_pd(fscal,dx02);
837             ty               = _mm_mul_pd(fscal,dy02);
838             tz               = _mm_mul_pd(fscal,dz02);
839
840             /* Update vectorial force */
841             fix0             = _mm_add_pd(fix0,tx);
842             fiy0             = _mm_add_pd(fiy0,ty);
843             fiz0             = _mm_add_pd(fiz0,tz);
844
845             fjx2             = _mm_add_pd(fjx2,tx);
846             fjy2             = _mm_add_pd(fjy2,ty);
847             fjz2             = _mm_add_pd(fjz2,tz);
848
849             }
850
851             /**************************
852              * CALCULATE INTERACTIONS *
853              **************************/
854
855             if (gmx_mm_any_lt(rsq10,rcutoff2))
856             {
857
858             /* REACTION-FIELD ELECTROSTATICS */
859             velec            = _mm_mul_pd(qq10,_mm_sub_pd(_mm_add_pd(rinv10,_mm_mul_pd(krf,rsq10)),crf));
860             felec            = _mm_mul_pd(qq10,_mm_sub_pd(_mm_mul_pd(rinv10,rinvsq10),krf2));
861
862             cutoff_mask      = _mm_cmplt_pd(rsq10,rcutoff2);
863
864             /* Update potential sum for this i atom from the interaction with this j atom. */
865             velec            = _mm_and_pd(velec,cutoff_mask);
866             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
867             velecsum         = _mm_add_pd(velecsum,velec);
868
869             fscal            = felec;
870
871             fscal            = _mm_and_pd(fscal,cutoff_mask);
872
873             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
874
875             /* Calculate temporary vectorial force */
876             tx               = _mm_mul_pd(fscal,dx10);
877             ty               = _mm_mul_pd(fscal,dy10);
878             tz               = _mm_mul_pd(fscal,dz10);
879
880             /* Update vectorial force */
881             fix1             = _mm_add_pd(fix1,tx);
882             fiy1             = _mm_add_pd(fiy1,ty);
883             fiz1             = _mm_add_pd(fiz1,tz);
884
885             fjx0             = _mm_add_pd(fjx0,tx);
886             fjy0             = _mm_add_pd(fjy0,ty);
887             fjz0             = _mm_add_pd(fjz0,tz);
888
889             }
890
891             /**************************
892              * CALCULATE INTERACTIONS *
893              **************************/
894
895             if (gmx_mm_any_lt(rsq11,rcutoff2))
896             {
897
898             /* REACTION-FIELD ELECTROSTATICS */
899             velec            = _mm_mul_pd(qq11,_mm_sub_pd(_mm_add_pd(rinv11,_mm_mul_pd(krf,rsq11)),crf));
900             felec            = _mm_mul_pd(qq11,_mm_sub_pd(_mm_mul_pd(rinv11,rinvsq11),krf2));
901
902             cutoff_mask      = _mm_cmplt_pd(rsq11,rcutoff2);
903
904             /* Update potential sum for this i atom from the interaction with this j atom. */
905             velec            = _mm_and_pd(velec,cutoff_mask);
906             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
907             velecsum         = _mm_add_pd(velecsum,velec);
908
909             fscal            = felec;
910
911             fscal            = _mm_and_pd(fscal,cutoff_mask);
912
913             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
914
915             /* Calculate temporary vectorial force */
916             tx               = _mm_mul_pd(fscal,dx11);
917             ty               = _mm_mul_pd(fscal,dy11);
918             tz               = _mm_mul_pd(fscal,dz11);
919
920             /* Update vectorial force */
921             fix1             = _mm_add_pd(fix1,tx);
922             fiy1             = _mm_add_pd(fiy1,ty);
923             fiz1             = _mm_add_pd(fiz1,tz);
924
925             fjx1             = _mm_add_pd(fjx1,tx);
926             fjy1             = _mm_add_pd(fjy1,ty);
927             fjz1             = _mm_add_pd(fjz1,tz);
928
929             }
930
931             /**************************
932              * CALCULATE INTERACTIONS *
933              **************************/
934
935             if (gmx_mm_any_lt(rsq12,rcutoff2))
936             {
937
938             /* REACTION-FIELD ELECTROSTATICS */
939             velec            = _mm_mul_pd(qq12,_mm_sub_pd(_mm_add_pd(rinv12,_mm_mul_pd(krf,rsq12)),crf));
940             felec            = _mm_mul_pd(qq12,_mm_sub_pd(_mm_mul_pd(rinv12,rinvsq12),krf2));
941
942             cutoff_mask      = _mm_cmplt_pd(rsq12,rcutoff2);
943
944             /* Update potential sum for this i atom from the interaction with this j atom. */
945             velec            = _mm_and_pd(velec,cutoff_mask);
946             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
947             velecsum         = _mm_add_pd(velecsum,velec);
948
949             fscal            = felec;
950
951             fscal            = _mm_and_pd(fscal,cutoff_mask);
952
953             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
954
955             /* Calculate temporary vectorial force */
956             tx               = _mm_mul_pd(fscal,dx12);
957             ty               = _mm_mul_pd(fscal,dy12);
958             tz               = _mm_mul_pd(fscal,dz12);
959
960             /* Update vectorial force */
961             fix1             = _mm_add_pd(fix1,tx);
962             fiy1             = _mm_add_pd(fiy1,ty);
963             fiz1             = _mm_add_pd(fiz1,tz);
964
965             fjx2             = _mm_add_pd(fjx2,tx);
966             fjy2             = _mm_add_pd(fjy2,ty);
967             fjz2             = _mm_add_pd(fjz2,tz);
968
969             }
970
971             /**************************
972              * CALCULATE INTERACTIONS *
973              **************************/
974
975             if (gmx_mm_any_lt(rsq20,rcutoff2))
976             {
977
978             /* REACTION-FIELD ELECTROSTATICS */
979             velec            = _mm_mul_pd(qq20,_mm_sub_pd(_mm_add_pd(rinv20,_mm_mul_pd(krf,rsq20)),crf));
980             felec            = _mm_mul_pd(qq20,_mm_sub_pd(_mm_mul_pd(rinv20,rinvsq20),krf2));
981
982             cutoff_mask      = _mm_cmplt_pd(rsq20,rcutoff2);
983
984             /* Update potential sum for this i atom from the interaction with this j atom. */
985             velec            = _mm_and_pd(velec,cutoff_mask);
986             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
987             velecsum         = _mm_add_pd(velecsum,velec);
988
989             fscal            = felec;
990
991             fscal            = _mm_and_pd(fscal,cutoff_mask);
992
993             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
994
995             /* Calculate temporary vectorial force */
996             tx               = _mm_mul_pd(fscal,dx20);
997             ty               = _mm_mul_pd(fscal,dy20);
998             tz               = _mm_mul_pd(fscal,dz20);
999
1000             /* Update vectorial force */
1001             fix2             = _mm_add_pd(fix2,tx);
1002             fiy2             = _mm_add_pd(fiy2,ty);
1003             fiz2             = _mm_add_pd(fiz2,tz);
1004
1005             fjx0             = _mm_add_pd(fjx0,tx);
1006             fjy0             = _mm_add_pd(fjy0,ty);
1007             fjz0             = _mm_add_pd(fjz0,tz);
1008
1009             }
1010
1011             /**************************
1012              * CALCULATE INTERACTIONS *
1013              **************************/
1014
1015             if (gmx_mm_any_lt(rsq21,rcutoff2))
1016             {
1017
1018             /* REACTION-FIELD ELECTROSTATICS */
1019             velec            = _mm_mul_pd(qq21,_mm_sub_pd(_mm_add_pd(rinv21,_mm_mul_pd(krf,rsq21)),crf));
1020             felec            = _mm_mul_pd(qq21,_mm_sub_pd(_mm_mul_pd(rinv21,rinvsq21),krf2));
1021
1022             cutoff_mask      = _mm_cmplt_pd(rsq21,rcutoff2);
1023
1024             /* Update potential sum for this i atom from the interaction with this j atom. */
1025             velec            = _mm_and_pd(velec,cutoff_mask);
1026             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
1027             velecsum         = _mm_add_pd(velecsum,velec);
1028
1029             fscal            = felec;
1030
1031             fscal            = _mm_and_pd(fscal,cutoff_mask);
1032
1033             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1034
1035             /* Calculate temporary vectorial force */
1036             tx               = _mm_mul_pd(fscal,dx21);
1037             ty               = _mm_mul_pd(fscal,dy21);
1038             tz               = _mm_mul_pd(fscal,dz21);
1039
1040             /* Update vectorial force */
1041             fix2             = _mm_add_pd(fix2,tx);
1042             fiy2             = _mm_add_pd(fiy2,ty);
1043             fiz2             = _mm_add_pd(fiz2,tz);
1044
1045             fjx1             = _mm_add_pd(fjx1,tx);
1046             fjy1             = _mm_add_pd(fjy1,ty);
1047             fjz1             = _mm_add_pd(fjz1,tz);
1048
1049             }
1050
1051             /**************************
1052              * CALCULATE INTERACTIONS *
1053              **************************/
1054
1055             if (gmx_mm_any_lt(rsq22,rcutoff2))
1056             {
1057
1058             /* REACTION-FIELD ELECTROSTATICS */
1059             velec            = _mm_mul_pd(qq22,_mm_sub_pd(_mm_add_pd(rinv22,_mm_mul_pd(krf,rsq22)),crf));
1060             felec            = _mm_mul_pd(qq22,_mm_sub_pd(_mm_mul_pd(rinv22,rinvsq22),krf2));
1061
1062             cutoff_mask      = _mm_cmplt_pd(rsq22,rcutoff2);
1063
1064             /* Update potential sum for this i atom from the interaction with this j atom. */
1065             velec            = _mm_and_pd(velec,cutoff_mask);
1066             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
1067             velecsum         = _mm_add_pd(velecsum,velec);
1068
1069             fscal            = felec;
1070
1071             fscal            = _mm_and_pd(fscal,cutoff_mask);
1072
1073             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1074
1075             /* Calculate temporary vectorial force */
1076             tx               = _mm_mul_pd(fscal,dx22);
1077             ty               = _mm_mul_pd(fscal,dy22);
1078             tz               = _mm_mul_pd(fscal,dz22);
1079
1080             /* Update vectorial force */
1081             fix2             = _mm_add_pd(fix2,tx);
1082             fiy2             = _mm_add_pd(fiy2,ty);
1083             fiz2             = _mm_add_pd(fiz2,tz);
1084
1085             fjx2             = _mm_add_pd(fjx2,tx);
1086             fjy2             = _mm_add_pd(fjy2,ty);
1087             fjz2             = _mm_add_pd(fjz2,tz);
1088
1089             }
1090
1091             gmx_mm_decrement_3rvec_1ptr_swizzle_pd(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1092
1093             /* Inner loop uses 342 flops */
1094         }
1095
1096         /* End of innermost loop */
1097
1098         gmx_mm_update_iforce_3atom_swizzle_pd(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1099                                               f+i_coord_offset,fshift+i_shift_offset);
1100
1101         ggid                        = gid[iidx];
1102         /* Update potential energies */
1103         gmx_mm_update_1pot_pd(velecsum,kernel_data->energygrp_elec+ggid);
1104         gmx_mm_update_1pot_pd(vvdwsum,kernel_data->energygrp_vdw+ggid);
1105
1106         /* Increment number of inner iterations */
1107         inneriter                  += j_index_end - j_index_start;
1108
1109         /* Outer loop uses 20 flops */
1110     }
1111
1112     /* Increment number of outer iterations */
1113     outeriter        += nri;
1114
1115     /* Update outer/inner flops */
1116
1117     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*342);
1118 }
1119 /*
1120  * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_sse4_1_double
1121  * Electrostatics interaction: ReactionField
1122  * VdW interaction:            LennardJones
1123  * Geometry:                   Water3-Water3
1124  * Calculate force/pot:        Force
1125  */
1126 void
1127 nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_sse4_1_double
1128                     (t_nblist                    * gmx_restrict       nlist,
1129                      rvec                        * gmx_restrict          xx,
1130                      rvec                        * gmx_restrict          ff,
1131                      t_forcerec                  * gmx_restrict          fr,
1132                      t_mdatoms                   * gmx_restrict     mdatoms,
1133                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
1134                      t_nrnb                      * gmx_restrict        nrnb)
1135 {
1136     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1137      * just 0 for non-waters.
1138      * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
1139      * jnr indices corresponding to data put in the four positions in the SIMD register.
1140      */
1141     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
1142     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1143     int              jnrA,jnrB;
1144     int              j_coord_offsetA,j_coord_offsetB;
1145     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
1146     real             rcutoff_scalar;
1147     real             *shiftvec,*fshift,*x,*f;
1148     __m128d          tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1149     int              vdwioffset0;
1150     __m128d          ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1151     int              vdwioffset1;
1152     __m128d          ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1153     int              vdwioffset2;
1154     __m128d          ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1155     int              vdwjidx0A,vdwjidx0B;
1156     __m128d          jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1157     int              vdwjidx1A,vdwjidx1B;
1158     __m128d          jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1159     int              vdwjidx2A,vdwjidx2B;
1160     __m128d          jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1161     __m128d          dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1162     __m128d          dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
1163     __m128d          dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
1164     __m128d          dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
1165     __m128d          dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1166     __m128d          dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1167     __m128d          dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
1168     __m128d          dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1169     __m128d          dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1170     __m128d          velec,felec,velecsum,facel,crf,krf,krf2;
1171     real             *charge;
1172     int              nvdwtype;
1173     __m128d          rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
1174     int              *vdwtype;
1175     real             *vdwparam;
1176     __m128d          one_sixth   = _mm_set1_pd(1.0/6.0);
1177     __m128d          one_twelfth = _mm_set1_pd(1.0/12.0);
1178     __m128d          dummy_mask,cutoff_mask;
1179     __m128d          signbit   = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
1180     __m128d          one     = _mm_set1_pd(1.0);
1181     __m128d          two     = _mm_set1_pd(2.0);
1182     x                = xx[0];
1183     f                = ff[0];
1184
1185     nri              = nlist->nri;
1186     iinr             = nlist->iinr;
1187     jindex           = nlist->jindex;
1188     jjnr             = nlist->jjnr;
1189     shiftidx         = nlist->shift;
1190     gid              = nlist->gid;
1191     shiftvec         = fr->shift_vec[0];
1192     fshift           = fr->fshift[0];
1193     facel            = _mm_set1_pd(fr->epsfac);
1194     charge           = mdatoms->chargeA;
1195     krf              = _mm_set1_pd(fr->ic->k_rf);
1196     krf2             = _mm_set1_pd(fr->ic->k_rf*2.0);
1197     crf              = _mm_set1_pd(fr->ic->c_rf);
1198     nvdwtype         = fr->ntype;
1199     vdwparam         = fr->nbfp;
1200     vdwtype          = mdatoms->typeA;
1201
1202     /* Setup water-specific parameters */
1203     inr              = nlist->iinr[0];
1204     iq0              = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+0]));
1205     iq1              = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+1]));
1206     iq2              = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+2]));
1207     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
1208
1209     jq0              = _mm_set1_pd(charge[inr+0]);
1210     jq1              = _mm_set1_pd(charge[inr+1]);
1211     jq2              = _mm_set1_pd(charge[inr+2]);
1212     vdwjidx0A        = 2*vdwtype[inr+0];
1213     qq00             = _mm_mul_pd(iq0,jq0);
1214     c6_00            = _mm_set1_pd(vdwparam[vdwioffset0+vdwjidx0A]);
1215     c12_00           = _mm_set1_pd(vdwparam[vdwioffset0+vdwjidx0A+1]);
1216     qq01             = _mm_mul_pd(iq0,jq1);
1217     qq02             = _mm_mul_pd(iq0,jq2);
1218     qq10             = _mm_mul_pd(iq1,jq0);
1219     qq11             = _mm_mul_pd(iq1,jq1);
1220     qq12             = _mm_mul_pd(iq1,jq2);
1221     qq20             = _mm_mul_pd(iq2,jq0);
1222     qq21             = _mm_mul_pd(iq2,jq1);
1223     qq22             = _mm_mul_pd(iq2,jq2);
1224
1225     /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
1226     rcutoff_scalar   = fr->rcoulomb;
1227     rcutoff          = _mm_set1_pd(rcutoff_scalar);
1228     rcutoff2         = _mm_mul_pd(rcutoff,rcutoff);
1229
1230     sh_vdw_invrcut6  = _mm_set1_pd(fr->ic->sh_invrc6);
1231     rvdw             = _mm_set1_pd(fr->rvdw);
1232
1233     /* Avoid stupid compiler warnings */
1234     jnrA = jnrB = 0;
1235     j_coord_offsetA = 0;
1236     j_coord_offsetB = 0;
1237
1238     outeriter        = 0;
1239     inneriter        = 0;
1240
1241     /* Start outer loop over neighborlists */
1242     for(iidx=0; iidx<nri; iidx++)
1243     {
1244         /* Load shift vector for this list */
1245         i_shift_offset   = DIM*shiftidx[iidx];
1246
1247         /* Load limits for loop over neighbors */
1248         j_index_start    = jindex[iidx];
1249         j_index_end      = jindex[iidx+1];
1250
1251         /* Get outer coordinate index */
1252         inr              = iinr[iidx];
1253         i_coord_offset   = DIM*inr;
1254
1255         /* Load i particle coords and add shift vector */
1256         gmx_mm_load_shift_and_3rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,
1257                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
1258
1259         fix0             = _mm_setzero_pd();
1260         fiy0             = _mm_setzero_pd();
1261         fiz0             = _mm_setzero_pd();
1262         fix1             = _mm_setzero_pd();
1263         fiy1             = _mm_setzero_pd();
1264         fiz1             = _mm_setzero_pd();
1265         fix2             = _mm_setzero_pd();
1266         fiy2             = _mm_setzero_pd();
1267         fiz2             = _mm_setzero_pd();
1268
1269         /* Start inner kernel loop */
1270         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1271         {
1272
1273             /* Get j neighbor index, and coordinate index */
1274             jnrA             = jjnr[jidx];
1275             jnrB             = jjnr[jidx+1];
1276             j_coord_offsetA  = DIM*jnrA;
1277             j_coord_offsetB  = DIM*jnrB;
1278
1279             /* load j atom coordinates */
1280             gmx_mm_load_3rvec_2ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
1281                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1282
1283             /* Calculate displacement vector */
1284             dx00             = _mm_sub_pd(ix0,jx0);
1285             dy00             = _mm_sub_pd(iy0,jy0);
1286             dz00             = _mm_sub_pd(iz0,jz0);
1287             dx01             = _mm_sub_pd(ix0,jx1);
1288             dy01             = _mm_sub_pd(iy0,jy1);
1289             dz01             = _mm_sub_pd(iz0,jz1);
1290             dx02             = _mm_sub_pd(ix0,jx2);
1291             dy02             = _mm_sub_pd(iy0,jy2);
1292             dz02             = _mm_sub_pd(iz0,jz2);
1293             dx10             = _mm_sub_pd(ix1,jx0);
1294             dy10             = _mm_sub_pd(iy1,jy0);
1295             dz10             = _mm_sub_pd(iz1,jz0);
1296             dx11             = _mm_sub_pd(ix1,jx1);
1297             dy11             = _mm_sub_pd(iy1,jy1);
1298             dz11             = _mm_sub_pd(iz1,jz1);
1299             dx12             = _mm_sub_pd(ix1,jx2);
1300             dy12             = _mm_sub_pd(iy1,jy2);
1301             dz12             = _mm_sub_pd(iz1,jz2);
1302             dx20             = _mm_sub_pd(ix2,jx0);
1303             dy20             = _mm_sub_pd(iy2,jy0);
1304             dz20             = _mm_sub_pd(iz2,jz0);
1305             dx21             = _mm_sub_pd(ix2,jx1);
1306             dy21             = _mm_sub_pd(iy2,jy1);
1307             dz21             = _mm_sub_pd(iz2,jz1);
1308             dx22             = _mm_sub_pd(ix2,jx2);
1309             dy22             = _mm_sub_pd(iy2,jy2);
1310             dz22             = _mm_sub_pd(iz2,jz2);
1311
1312             /* Calculate squared distance and things based on it */
1313             rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
1314             rsq01            = gmx_mm_calc_rsq_pd(dx01,dy01,dz01);
1315             rsq02            = gmx_mm_calc_rsq_pd(dx02,dy02,dz02);
1316             rsq10            = gmx_mm_calc_rsq_pd(dx10,dy10,dz10);
1317             rsq11            = gmx_mm_calc_rsq_pd(dx11,dy11,dz11);
1318             rsq12            = gmx_mm_calc_rsq_pd(dx12,dy12,dz12);
1319             rsq20            = gmx_mm_calc_rsq_pd(dx20,dy20,dz20);
1320             rsq21            = gmx_mm_calc_rsq_pd(dx21,dy21,dz21);
1321             rsq22            = gmx_mm_calc_rsq_pd(dx22,dy22,dz22);
1322
1323             rinv00           = gmx_mm_invsqrt_pd(rsq00);
1324             rinv01           = gmx_mm_invsqrt_pd(rsq01);
1325             rinv02           = gmx_mm_invsqrt_pd(rsq02);
1326             rinv10           = gmx_mm_invsqrt_pd(rsq10);
1327             rinv11           = gmx_mm_invsqrt_pd(rsq11);
1328             rinv12           = gmx_mm_invsqrt_pd(rsq12);
1329             rinv20           = gmx_mm_invsqrt_pd(rsq20);
1330             rinv21           = gmx_mm_invsqrt_pd(rsq21);
1331             rinv22           = gmx_mm_invsqrt_pd(rsq22);
1332
1333             rinvsq00         = _mm_mul_pd(rinv00,rinv00);
1334             rinvsq01         = _mm_mul_pd(rinv01,rinv01);
1335             rinvsq02         = _mm_mul_pd(rinv02,rinv02);
1336             rinvsq10         = _mm_mul_pd(rinv10,rinv10);
1337             rinvsq11         = _mm_mul_pd(rinv11,rinv11);
1338             rinvsq12         = _mm_mul_pd(rinv12,rinv12);
1339             rinvsq20         = _mm_mul_pd(rinv20,rinv20);
1340             rinvsq21         = _mm_mul_pd(rinv21,rinv21);
1341             rinvsq22         = _mm_mul_pd(rinv22,rinv22);
1342
1343             fjx0             = _mm_setzero_pd();
1344             fjy0             = _mm_setzero_pd();
1345             fjz0             = _mm_setzero_pd();
1346             fjx1             = _mm_setzero_pd();
1347             fjy1             = _mm_setzero_pd();
1348             fjz1             = _mm_setzero_pd();
1349             fjx2             = _mm_setzero_pd();
1350             fjy2             = _mm_setzero_pd();
1351             fjz2             = _mm_setzero_pd();
1352
1353             /**************************
1354              * CALCULATE INTERACTIONS *
1355              **************************/
1356
1357             if (gmx_mm_any_lt(rsq00,rcutoff2))
1358             {
1359
1360             /* REACTION-FIELD ELECTROSTATICS */
1361             felec            = _mm_mul_pd(qq00,_mm_sub_pd(_mm_mul_pd(rinv00,rinvsq00),krf2));
1362
1363             /* LENNARD-JONES DISPERSION/REPULSION */
1364
1365             rinvsix          = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
1366             fvdw             = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(c12_00,rinvsix),c6_00),_mm_mul_pd(rinvsix,rinvsq00));
1367
1368             cutoff_mask      = _mm_cmplt_pd(rsq00,rcutoff2);
1369
1370             fscal            = _mm_add_pd(felec,fvdw);
1371
1372             fscal            = _mm_and_pd(fscal,cutoff_mask);
1373
1374             /* Calculate temporary vectorial force */
1375             tx               = _mm_mul_pd(fscal,dx00);
1376             ty               = _mm_mul_pd(fscal,dy00);
1377             tz               = _mm_mul_pd(fscal,dz00);
1378
1379             /* Update vectorial force */
1380             fix0             = _mm_add_pd(fix0,tx);
1381             fiy0             = _mm_add_pd(fiy0,ty);
1382             fiz0             = _mm_add_pd(fiz0,tz);
1383
1384             fjx0             = _mm_add_pd(fjx0,tx);
1385             fjy0             = _mm_add_pd(fjy0,ty);
1386             fjz0             = _mm_add_pd(fjz0,tz);
1387
1388             }
1389
1390             /**************************
1391              * CALCULATE INTERACTIONS *
1392              **************************/
1393
1394             if (gmx_mm_any_lt(rsq01,rcutoff2))
1395             {
1396
1397             /* REACTION-FIELD ELECTROSTATICS */
1398             felec            = _mm_mul_pd(qq01,_mm_sub_pd(_mm_mul_pd(rinv01,rinvsq01),krf2));
1399
1400             cutoff_mask      = _mm_cmplt_pd(rsq01,rcutoff2);
1401
1402             fscal            = felec;
1403
1404             fscal            = _mm_and_pd(fscal,cutoff_mask);
1405
1406             /* Calculate temporary vectorial force */
1407             tx               = _mm_mul_pd(fscal,dx01);
1408             ty               = _mm_mul_pd(fscal,dy01);
1409             tz               = _mm_mul_pd(fscal,dz01);
1410
1411             /* Update vectorial force */
1412             fix0             = _mm_add_pd(fix0,tx);
1413             fiy0             = _mm_add_pd(fiy0,ty);
1414             fiz0             = _mm_add_pd(fiz0,tz);
1415
1416             fjx1             = _mm_add_pd(fjx1,tx);
1417             fjy1             = _mm_add_pd(fjy1,ty);
1418             fjz1             = _mm_add_pd(fjz1,tz);
1419
1420             }
1421
1422             /**************************
1423              * CALCULATE INTERACTIONS *
1424              **************************/
1425
1426             if (gmx_mm_any_lt(rsq02,rcutoff2))
1427             {
1428
1429             /* REACTION-FIELD ELECTROSTATICS */
1430             felec            = _mm_mul_pd(qq02,_mm_sub_pd(_mm_mul_pd(rinv02,rinvsq02),krf2));
1431
1432             cutoff_mask      = _mm_cmplt_pd(rsq02,rcutoff2);
1433
1434             fscal            = felec;
1435
1436             fscal            = _mm_and_pd(fscal,cutoff_mask);
1437
1438             /* Calculate temporary vectorial force */
1439             tx               = _mm_mul_pd(fscal,dx02);
1440             ty               = _mm_mul_pd(fscal,dy02);
1441             tz               = _mm_mul_pd(fscal,dz02);
1442
1443             /* Update vectorial force */
1444             fix0             = _mm_add_pd(fix0,tx);
1445             fiy0             = _mm_add_pd(fiy0,ty);
1446             fiz0             = _mm_add_pd(fiz0,tz);
1447
1448             fjx2             = _mm_add_pd(fjx2,tx);
1449             fjy2             = _mm_add_pd(fjy2,ty);
1450             fjz2             = _mm_add_pd(fjz2,tz);
1451
1452             }
1453
1454             /**************************
1455              * CALCULATE INTERACTIONS *
1456              **************************/
1457
1458             if (gmx_mm_any_lt(rsq10,rcutoff2))
1459             {
1460
1461             /* REACTION-FIELD ELECTROSTATICS */
1462             felec            = _mm_mul_pd(qq10,_mm_sub_pd(_mm_mul_pd(rinv10,rinvsq10),krf2));
1463
1464             cutoff_mask      = _mm_cmplt_pd(rsq10,rcutoff2);
1465
1466             fscal            = felec;
1467
1468             fscal            = _mm_and_pd(fscal,cutoff_mask);
1469
1470             /* Calculate temporary vectorial force */
1471             tx               = _mm_mul_pd(fscal,dx10);
1472             ty               = _mm_mul_pd(fscal,dy10);
1473             tz               = _mm_mul_pd(fscal,dz10);
1474
1475             /* Update vectorial force */
1476             fix1             = _mm_add_pd(fix1,tx);
1477             fiy1             = _mm_add_pd(fiy1,ty);
1478             fiz1             = _mm_add_pd(fiz1,tz);
1479
1480             fjx0             = _mm_add_pd(fjx0,tx);
1481             fjy0             = _mm_add_pd(fjy0,ty);
1482             fjz0             = _mm_add_pd(fjz0,tz);
1483
1484             }
1485
1486             /**************************
1487              * CALCULATE INTERACTIONS *
1488              **************************/
1489
1490             if (gmx_mm_any_lt(rsq11,rcutoff2))
1491             {
1492
1493             /* REACTION-FIELD ELECTROSTATICS */
1494             felec            = _mm_mul_pd(qq11,_mm_sub_pd(_mm_mul_pd(rinv11,rinvsq11),krf2));
1495
1496             cutoff_mask      = _mm_cmplt_pd(rsq11,rcutoff2);
1497
1498             fscal            = felec;
1499
1500             fscal            = _mm_and_pd(fscal,cutoff_mask);
1501
1502             /* Calculate temporary vectorial force */
1503             tx               = _mm_mul_pd(fscal,dx11);
1504             ty               = _mm_mul_pd(fscal,dy11);
1505             tz               = _mm_mul_pd(fscal,dz11);
1506
1507             /* Update vectorial force */
1508             fix1             = _mm_add_pd(fix1,tx);
1509             fiy1             = _mm_add_pd(fiy1,ty);
1510             fiz1             = _mm_add_pd(fiz1,tz);
1511
1512             fjx1             = _mm_add_pd(fjx1,tx);
1513             fjy1             = _mm_add_pd(fjy1,ty);
1514             fjz1             = _mm_add_pd(fjz1,tz);
1515
1516             }
1517
1518             /**************************
1519              * CALCULATE INTERACTIONS *
1520              **************************/
1521
1522             if (gmx_mm_any_lt(rsq12,rcutoff2))
1523             {
1524
1525             /* REACTION-FIELD ELECTROSTATICS */
1526             felec            = _mm_mul_pd(qq12,_mm_sub_pd(_mm_mul_pd(rinv12,rinvsq12),krf2));
1527
1528             cutoff_mask      = _mm_cmplt_pd(rsq12,rcutoff2);
1529
1530             fscal            = felec;
1531
1532             fscal            = _mm_and_pd(fscal,cutoff_mask);
1533
1534             /* Calculate temporary vectorial force */
1535             tx               = _mm_mul_pd(fscal,dx12);
1536             ty               = _mm_mul_pd(fscal,dy12);
1537             tz               = _mm_mul_pd(fscal,dz12);
1538
1539             /* Update vectorial force */
1540             fix1             = _mm_add_pd(fix1,tx);
1541             fiy1             = _mm_add_pd(fiy1,ty);
1542             fiz1             = _mm_add_pd(fiz1,tz);
1543
1544             fjx2             = _mm_add_pd(fjx2,tx);
1545             fjy2             = _mm_add_pd(fjy2,ty);
1546             fjz2             = _mm_add_pd(fjz2,tz);
1547
1548             }
1549
1550             /**************************
1551              * CALCULATE INTERACTIONS *
1552              **************************/
1553
1554             if (gmx_mm_any_lt(rsq20,rcutoff2))
1555             {
1556
1557             /* REACTION-FIELD ELECTROSTATICS */
1558             felec            = _mm_mul_pd(qq20,_mm_sub_pd(_mm_mul_pd(rinv20,rinvsq20),krf2));
1559
1560             cutoff_mask      = _mm_cmplt_pd(rsq20,rcutoff2);
1561
1562             fscal            = felec;
1563
1564             fscal            = _mm_and_pd(fscal,cutoff_mask);
1565
1566             /* Calculate temporary vectorial force */
1567             tx               = _mm_mul_pd(fscal,dx20);
1568             ty               = _mm_mul_pd(fscal,dy20);
1569             tz               = _mm_mul_pd(fscal,dz20);
1570
1571             /* Update vectorial force */
1572             fix2             = _mm_add_pd(fix2,tx);
1573             fiy2             = _mm_add_pd(fiy2,ty);
1574             fiz2             = _mm_add_pd(fiz2,tz);
1575
1576             fjx0             = _mm_add_pd(fjx0,tx);
1577             fjy0             = _mm_add_pd(fjy0,ty);
1578             fjz0             = _mm_add_pd(fjz0,tz);
1579
1580             }
1581
1582             /**************************
1583              * CALCULATE INTERACTIONS *
1584              **************************/
1585
1586             if (gmx_mm_any_lt(rsq21,rcutoff2))
1587             {
1588
1589             /* REACTION-FIELD ELECTROSTATICS */
1590             felec            = _mm_mul_pd(qq21,_mm_sub_pd(_mm_mul_pd(rinv21,rinvsq21),krf2));
1591
1592             cutoff_mask      = _mm_cmplt_pd(rsq21,rcutoff2);
1593
1594             fscal            = felec;
1595
1596             fscal            = _mm_and_pd(fscal,cutoff_mask);
1597
1598             /* Calculate temporary vectorial force */
1599             tx               = _mm_mul_pd(fscal,dx21);
1600             ty               = _mm_mul_pd(fscal,dy21);
1601             tz               = _mm_mul_pd(fscal,dz21);
1602
1603             /* Update vectorial force */
1604             fix2             = _mm_add_pd(fix2,tx);
1605             fiy2             = _mm_add_pd(fiy2,ty);
1606             fiz2             = _mm_add_pd(fiz2,tz);
1607
1608             fjx1             = _mm_add_pd(fjx1,tx);
1609             fjy1             = _mm_add_pd(fjy1,ty);
1610             fjz1             = _mm_add_pd(fjz1,tz);
1611
1612             }
1613
1614             /**************************
1615              * CALCULATE INTERACTIONS *
1616              **************************/
1617
1618             if (gmx_mm_any_lt(rsq22,rcutoff2))
1619             {
1620
1621             /* REACTION-FIELD ELECTROSTATICS */
1622             felec            = _mm_mul_pd(qq22,_mm_sub_pd(_mm_mul_pd(rinv22,rinvsq22),krf2));
1623
1624             cutoff_mask      = _mm_cmplt_pd(rsq22,rcutoff2);
1625
1626             fscal            = felec;
1627
1628             fscal            = _mm_and_pd(fscal,cutoff_mask);
1629
1630             /* Calculate temporary vectorial force */
1631             tx               = _mm_mul_pd(fscal,dx22);
1632             ty               = _mm_mul_pd(fscal,dy22);
1633             tz               = _mm_mul_pd(fscal,dz22);
1634
1635             /* Update vectorial force */
1636             fix2             = _mm_add_pd(fix2,tx);
1637             fiy2             = _mm_add_pd(fiy2,ty);
1638             fiz2             = _mm_add_pd(fiz2,tz);
1639
1640             fjx2             = _mm_add_pd(fjx2,tx);
1641             fjy2             = _mm_add_pd(fjy2,ty);
1642             fjz2             = _mm_add_pd(fjz2,tz);
1643
1644             }
1645
1646             gmx_mm_decrement_3rvec_2ptr_swizzle_pd(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1647
1648             /* Inner loop uses 277 flops */
1649         }
1650
1651         if(jidx<j_index_end)
1652         {
1653
1654             jnrA             = jjnr[jidx];
1655             j_coord_offsetA  = DIM*jnrA;
1656
1657             /* load j atom coordinates */
1658             gmx_mm_load_3rvec_1ptr_swizzle_pd(x+j_coord_offsetA,
1659                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1660
1661             /* Calculate displacement vector */
1662             dx00             = _mm_sub_pd(ix0,jx0);
1663             dy00             = _mm_sub_pd(iy0,jy0);
1664             dz00             = _mm_sub_pd(iz0,jz0);
1665             dx01             = _mm_sub_pd(ix0,jx1);
1666             dy01             = _mm_sub_pd(iy0,jy1);
1667             dz01             = _mm_sub_pd(iz0,jz1);
1668             dx02             = _mm_sub_pd(ix0,jx2);
1669             dy02             = _mm_sub_pd(iy0,jy2);
1670             dz02             = _mm_sub_pd(iz0,jz2);
1671             dx10             = _mm_sub_pd(ix1,jx0);
1672             dy10             = _mm_sub_pd(iy1,jy0);
1673             dz10             = _mm_sub_pd(iz1,jz0);
1674             dx11             = _mm_sub_pd(ix1,jx1);
1675             dy11             = _mm_sub_pd(iy1,jy1);
1676             dz11             = _mm_sub_pd(iz1,jz1);
1677             dx12             = _mm_sub_pd(ix1,jx2);
1678             dy12             = _mm_sub_pd(iy1,jy2);
1679             dz12             = _mm_sub_pd(iz1,jz2);
1680             dx20             = _mm_sub_pd(ix2,jx0);
1681             dy20             = _mm_sub_pd(iy2,jy0);
1682             dz20             = _mm_sub_pd(iz2,jz0);
1683             dx21             = _mm_sub_pd(ix2,jx1);
1684             dy21             = _mm_sub_pd(iy2,jy1);
1685             dz21             = _mm_sub_pd(iz2,jz1);
1686             dx22             = _mm_sub_pd(ix2,jx2);
1687             dy22             = _mm_sub_pd(iy2,jy2);
1688             dz22             = _mm_sub_pd(iz2,jz2);
1689
1690             /* Calculate squared distance and things based on it */
1691             rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
1692             rsq01            = gmx_mm_calc_rsq_pd(dx01,dy01,dz01);
1693             rsq02            = gmx_mm_calc_rsq_pd(dx02,dy02,dz02);
1694             rsq10            = gmx_mm_calc_rsq_pd(dx10,dy10,dz10);
1695             rsq11            = gmx_mm_calc_rsq_pd(dx11,dy11,dz11);
1696             rsq12            = gmx_mm_calc_rsq_pd(dx12,dy12,dz12);
1697             rsq20            = gmx_mm_calc_rsq_pd(dx20,dy20,dz20);
1698             rsq21            = gmx_mm_calc_rsq_pd(dx21,dy21,dz21);
1699             rsq22            = gmx_mm_calc_rsq_pd(dx22,dy22,dz22);
1700
1701             rinv00           = gmx_mm_invsqrt_pd(rsq00);
1702             rinv01           = gmx_mm_invsqrt_pd(rsq01);
1703             rinv02           = gmx_mm_invsqrt_pd(rsq02);
1704             rinv10           = gmx_mm_invsqrt_pd(rsq10);
1705             rinv11           = gmx_mm_invsqrt_pd(rsq11);
1706             rinv12           = gmx_mm_invsqrt_pd(rsq12);
1707             rinv20           = gmx_mm_invsqrt_pd(rsq20);
1708             rinv21           = gmx_mm_invsqrt_pd(rsq21);
1709             rinv22           = gmx_mm_invsqrt_pd(rsq22);
1710
1711             rinvsq00         = _mm_mul_pd(rinv00,rinv00);
1712             rinvsq01         = _mm_mul_pd(rinv01,rinv01);
1713             rinvsq02         = _mm_mul_pd(rinv02,rinv02);
1714             rinvsq10         = _mm_mul_pd(rinv10,rinv10);
1715             rinvsq11         = _mm_mul_pd(rinv11,rinv11);
1716             rinvsq12         = _mm_mul_pd(rinv12,rinv12);
1717             rinvsq20         = _mm_mul_pd(rinv20,rinv20);
1718             rinvsq21         = _mm_mul_pd(rinv21,rinv21);
1719             rinvsq22         = _mm_mul_pd(rinv22,rinv22);
1720
1721             fjx0             = _mm_setzero_pd();
1722             fjy0             = _mm_setzero_pd();
1723             fjz0             = _mm_setzero_pd();
1724             fjx1             = _mm_setzero_pd();
1725             fjy1             = _mm_setzero_pd();
1726             fjz1             = _mm_setzero_pd();
1727             fjx2             = _mm_setzero_pd();
1728             fjy2             = _mm_setzero_pd();
1729             fjz2             = _mm_setzero_pd();
1730
1731             /**************************
1732              * CALCULATE INTERACTIONS *
1733              **************************/
1734
1735             if (gmx_mm_any_lt(rsq00,rcutoff2))
1736             {
1737
1738             /* REACTION-FIELD ELECTROSTATICS */
1739             felec            = _mm_mul_pd(qq00,_mm_sub_pd(_mm_mul_pd(rinv00,rinvsq00),krf2));
1740
1741             /* LENNARD-JONES DISPERSION/REPULSION */
1742
1743             rinvsix          = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
1744             fvdw             = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(c12_00,rinvsix),c6_00),_mm_mul_pd(rinvsix,rinvsq00));
1745
1746             cutoff_mask      = _mm_cmplt_pd(rsq00,rcutoff2);
1747
1748             fscal            = _mm_add_pd(felec,fvdw);
1749
1750             fscal            = _mm_and_pd(fscal,cutoff_mask);
1751
1752             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1753
1754             /* Calculate temporary vectorial force */
1755             tx               = _mm_mul_pd(fscal,dx00);
1756             ty               = _mm_mul_pd(fscal,dy00);
1757             tz               = _mm_mul_pd(fscal,dz00);
1758
1759             /* Update vectorial force */
1760             fix0             = _mm_add_pd(fix0,tx);
1761             fiy0             = _mm_add_pd(fiy0,ty);
1762             fiz0             = _mm_add_pd(fiz0,tz);
1763
1764             fjx0             = _mm_add_pd(fjx0,tx);
1765             fjy0             = _mm_add_pd(fjy0,ty);
1766             fjz0             = _mm_add_pd(fjz0,tz);
1767
1768             }
1769
1770             /**************************
1771              * CALCULATE INTERACTIONS *
1772              **************************/
1773
1774             if (gmx_mm_any_lt(rsq01,rcutoff2))
1775             {
1776
1777             /* REACTION-FIELD ELECTROSTATICS */
1778             felec            = _mm_mul_pd(qq01,_mm_sub_pd(_mm_mul_pd(rinv01,rinvsq01),krf2));
1779
1780             cutoff_mask      = _mm_cmplt_pd(rsq01,rcutoff2);
1781
1782             fscal            = felec;
1783
1784             fscal            = _mm_and_pd(fscal,cutoff_mask);
1785
1786             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1787
1788             /* Calculate temporary vectorial force */
1789             tx               = _mm_mul_pd(fscal,dx01);
1790             ty               = _mm_mul_pd(fscal,dy01);
1791             tz               = _mm_mul_pd(fscal,dz01);
1792
1793             /* Update vectorial force */
1794             fix0             = _mm_add_pd(fix0,tx);
1795             fiy0             = _mm_add_pd(fiy0,ty);
1796             fiz0             = _mm_add_pd(fiz0,tz);
1797
1798             fjx1             = _mm_add_pd(fjx1,tx);
1799             fjy1             = _mm_add_pd(fjy1,ty);
1800             fjz1             = _mm_add_pd(fjz1,tz);
1801
1802             }
1803
1804             /**************************
1805              * CALCULATE INTERACTIONS *
1806              **************************/
1807
1808             if (gmx_mm_any_lt(rsq02,rcutoff2))
1809             {
1810
1811             /* REACTION-FIELD ELECTROSTATICS */
1812             felec            = _mm_mul_pd(qq02,_mm_sub_pd(_mm_mul_pd(rinv02,rinvsq02),krf2));
1813
1814             cutoff_mask      = _mm_cmplt_pd(rsq02,rcutoff2);
1815
1816             fscal            = felec;
1817
1818             fscal            = _mm_and_pd(fscal,cutoff_mask);
1819
1820             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1821
1822             /* Calculate temporary vectorial force */
1823             tx               = _mm_mul_pd(fscal,dx02);
1824             ty               = _mm_mul_pd(fscal,dy02);
1825             tz               = _mm_mul_pd(fscal,dz02);
1826
1827             /* Update vectorial force */
1828             fix0             = _mm_add_pd(fix0,tx);
1829             fiy0             = _mm_add_pd(fiy0,ty);
1830             fiz0             = _mm_add_pd(fiz0,tz);
1831
1832             fjx2             = _mm_add_pd(fjx2,tx);
1833             fjy2             = _mm_add_pd(fjy2,ty);
1834             fjz2             = _mm_add_pd(fjz2,tz);
1835
1836             }
1837
1838             /**************************
1839              * CALCULATE INTERACTIONS *
1840              **************************/
1841
1842             if (gmx_mm_any_lt(rsq10,rcutoff2))
1843             {
1844
1845             /* REACTION-FIELD ELECTROSTATICS */
1846             felec            = _mm_mul_pd(qq10,_mm_sub_pd(_mm_mul_pd(rinv10,rinvsq10),krf2));
1847
1848             cutoff_mask      = _mm_cmplt_pd(rsq10,rcutoff2);
1849
1850             fscal            = felec;
1851
1852             fscal            = _mm_and_pd(fscal,cutoff_mask);
1853
1854             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1855
1856             /* Calculate temporary vectorial force */
1857             tx               = _mm_mul_pd(fscal,dx10);
1858             ty               = _mm_mul_pd(fscal,dy10);
1859             tz               = _mm_mul_pd(fscal,dz10);
1860
1861             /* Update vectorial force */
1862             fix1             = _mm_add_pd(fix1,tx);
1863             fiy1             = _mm_add_pd(fiy1,ty);
1864             fiz1             = _mm_add_pd(fiz1,tz);
1865
1866             fjx0             = _mm_add_pd(fjx0,tx);
1867             fjy0             = _mm_add_pd(fjy0,ty);
1868             fjz0             = _mm_add_pd(fjz0,tz);
1869
1870             }
1871
1872             /**************************
1873              * CALCULATE INTERACTIONS *
1874              **************************/
1875
1876             if (gmx_mm_any_lt(rsq11,rcutoff2))
1877             {
1878
1879             /* REACTION-FIELD ELECTROSTATICS */
1880             felec            = _mm_mul_pd(qq11,_mm_sub_pd(_mm_mul_pd(rinv11,rinvsq11),krf2));
1881
1882             cutoff_mask      = _mm_cmplt_pd(rsq11,rcutoff2);
1883
1884             fscal            = felec;
1885
1886             fscal            = _mm_and_pd(fscal,cutoff_mask);
1887
1888             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1889
1890             /* Calculate temporary vectorial force */
1891             tx               = _mm_mul_pd(fscal,dx11);
1892             ty               = _mm_mul_pd(fscal,dy11);
1893             tz               = _mm_mul_pd(fscal,dz11);
1894
1895             /* Update vectorial force */
1896             fix1             = _mm_add_pd(fix1,tx);
1897             fiy1             = _mm_add_pd(fiy1,ty);
1898             fiz1             = _mm_add_pd(fiz1,tz);
1899
1900             fjx1             = _mm_add_pd(fjx1,tx);
1901             fjy1             = _mm_add_pd(fjy1,ty);
1902             fjz1             = _mm_add_pd(fjz1,tz);
1903
1904             }
1905
1906             /**************************
1907              * CALCULATE INTERACTIONS *
1908              **************************/
1909
1910             if (gmx_mm_any_lt(rsq12,rcutoff2))
1911             {
1912
1913             /* REACTION-FIELD ELECTROSTATICS */
1914             felec            = _mm_mul_pd(qq12,_mm_sub_pd(_mm_mul_pd(rinv12,rinvsq12),krf2));
1915
1916             cutoff_mask      = _mm_cmplt_pd(rsq12,rcutoff2);
1917
1918             fscal            = felec;
1919
1920             fscal            = _mm_and_pd(fscal,cutoff_mask);
1921
1922             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1923
1924             /* Calculate temporary vectorial force */
1925             tx               = _mm_mul_pd(fscal,dx12);
1926             ty               = _mm_mul_pd(fscal,dy12);
1927             tz               = _mm_mul_pd(fscal,dz12);
1928
1929             /* Update vectorial force */
1930             fix1             = _mm_add_pd(fix1,tx);
1931             fiy1             = _mm_add_pd(fiy1,ty);
1932             fiz1             = _mm_add_pd(fiz1,tz);
1933
1934             fjx2             = _mm_add_pd(fjx2,tx);
1935             fjy2             = _mm_add_pd(fjy2,ty);
1936             fjz2             = _mm_add_pd(fjz2,tz);
1937
1938             }
1939
1940             /**************************
1941              * CALCULATE INTERACTIONS *
1942              **************************/
1943
1944             if (gmx_mm_any_lt(rsq20,rcutoff2))
1945             {
1946
1947             /* REACTION-FIELD ELECTROSTATICS */
1948             felec            = _mm_mul_pd(qq20,_mm_sub_pd(_mm_mul_pd(rinv20,rinvsq20),krf2));
1949
1950             cutoff_mask      = _mm_cmplt_pd(rsq20,rcutoff2);
1951
1952             fscal            = felec;
1953
1954             fscal            = _mm_and_pd(fscal,cutoff_mask);
1955
1956             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1957
1958             /* Calculate temporary vectorial force */
1959             tx               = _mm_mul_pd(fscal,dx20);
1960             ty               = _mm_mul_pd(fscal,dy20);
1961             tz               = _mm_mul_pd(fscal,dz20);
1962
1963             /* Update vectorial force */
1964             fix2             = _mm_add_pd(fix2,tx);
1965             fiy2             = _mm_add_pd(fiy2,ty);
1966             fiz2             = _mm_add_pd(fiz2,tz);
1967
1968             fjx0             = _mm_add_pd(fjx0,tx);
1969             fjy0             = _mm_add_pd(fjy0,ty);
1970             fjz0             = _mm_add_pd(fjz0,tz);
1971
1972             }
1973
1974             /**************************
1975              * CALCULATE INTERACTIONS *
1976              **************************/
1977
1978             if (gmx_mm_any_lt(rsq21,rcutoff2))
1979             {
1980
1981             /* REACTION-FIELD ELECTROSTATICS */
1982             felec            = _mm_mul_pd(qq21,_mm_sub_pd(_mm_mul_pd(rinv21,rinvsq21),krf2));
1983
1984             cutoff_mask      = _mm_cmplt_pd(rsq21,rcutoff2);
1985
1986             fscal            = felec;
1987
1988             fscal            = _mm_and_pd(fscal,cutoff_mask);
1989
1990             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1991
1992             /* Calculate temporary vectorial force */
1993             tx               = _mm_mul_pd(fscal,dx21);
1994             ty               = _mm_mul_pd(fscal,dy21);
1995             tz               = _mm_mul_pd(fscal,dz21);
1996
1997             /* Update vectorial force */
1998             fix2             = _mm_add_pd(fix2,tx);
1999             fiy2             = _mm_add_pd(fiy2,ty);
2000             fiz2             = _mm_add_pd(fiz2,tz);
2001
2002             fjx1             = _mm_add_pd(fjx1,tx);
2003             fjy1             = _mm_add_pd(fjy1,ty);
2004             fjz1             = _mm_add_pd(fjz1,tz);
2005
2006             }
2007
2008             /**************************
2009              * CALCULATE INTERACTIONS *
2010              **************************/
2011
2012             if (gmx_mm_any_lt(rsq22,rcutoff2))
2013             {
2014
2015             /* REACTION-FIELD ELECTROSTATICS */
2016             felec            = _mm_mul_pd(qq22,_mm_sub_pd(_mm_mul_pd(rinv22,rinvsq22),krf2));
2017
2018             cutoff_mask      = _mm_cmplt_pd(rsq22,rcutoff2);
2019
2020             fscal            = felec;
2021
2022             fscal            = _mm_and_pd(fscal,cutoff_mask);
2023
2024             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
2025
2026             /* Calculate temporary vectorial force */
2027             tx               = _mm_mul_pd(fscal,dx22);
2028             ty               = _mm_mul_pd(fscal,dy22);
2029             tz               = _mm_mul_pd(fscal,dz22);
2030
2031             /* Update vectorial force */
2032             fix2             = _mm_add_pd(fix2,tx);
2033             fiy2             = _mm_add_pd(fiy2,ty);
2034             fiz2             = _mm_add_pd(fiz2,tz);
2035
2036             fjx2             = _mm_add_pd(fjx2,tx);
2037             fjy2             = _mm_add_pd(fjy2,ty);
2038             fjz2             = _mm_add_pd(fjz2,tz);
2039
2040             }
2041
2042             gmx_mm_decrement_3rvec_1ptr_swizzle_pd(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
2043
2044             /* Inner loop uses 277 flops */
2045         }
2046
2047         /* End of innermost loop */
2048
2049         gmx_mm_update_iforce_3atom_swizzle_pd(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
2050                                               f+i_coord_offset,fshift+i_shift_offset);
2051
2052         /* Increment number of inner iterations */
2053         inneriter                  += j_index_end - j_index_start;
2054
2055         /* Outer loop uses 18 flops */
2056     }
2057
2058     /* Increment number of outer iterations */
2059     outeriter        += nri;
2060
2061     /* Update outer/inner flops */
2062
2063     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*277);
2064 }