Remove all unnecessary HAVE_CONFIG_H
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_sse2_double / nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_sse2_double.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the GROMACS sse2_double kernel generator.
37  */
38 #include "config.h"
39
40 #include <math.h>
41
42 #include "../nb_kernel.h"
43 #include "types/simple.h"
44 #include "gromacs/math/vec.h"
45 #include "nrnb.h"
46
47 #include "gromacs/simd/math_x86_sse2_double.h"
48 #include "kernelutil_x86_sse2_double.h"
49
50 /*
51  * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_sse2_double
52  * Electrostatics interaction: ReactionField
53  * VdW interaction:            LennardJones
54  * Geometry:                   Water4-Water4
55  * Calculate force/pot:        PotentialAndForce
56  */
57 void
58 nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_sse2_double
59                     (t_nblist                    * gmx_restrict       nlist,
60                      rvec                        * gmx_restrict          xx,
61                      rvec                        * gmx_restrict          ff,
62                      t_forcerec                  * gmx_restrict          fr,
63                      t_mdatoms                   * gmx_restrict     mdatoms,
64                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
65                      t_nrnb                      * gmx_restrict        nrnb)
66 {
67     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
68      * just 0 for non-waters.
69      * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
70      * jnr indices corresponding to data put in the four positions in the SIMD register.
71      */
72     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
73     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
74     int              jnrA,jnrB;
75     int              j_coord_offsetA,j_coord_offsetB;
76     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
77     real             rcutoff_scalar;
78     real             *shiftvec,*fshift,*x,*f;
79     __m128d          tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
80     int              vdwioffset0;
81     __m128d          ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
82     int              vdwioffset1;
83     __m128d          ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
84     int              vdwioffset2;
85     __m128d          ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
86     int              vdwioffset3;
87     __m128d          ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
88     int              vdwjidx0A,vdwjidx0B;
89     __m128d          jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
90     int              vdwjidx1A,vdwjidx1B;
91     __m128d          jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
92     int              vdwjidx2A,vdwjidx2B;
93     __m128d          jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
94     int              vdwjidx3A,vdwjidx3B;
95     __m128d          jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
96     __m128d          dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
97     __m128d          dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
98     __m128d          dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
99     __m128d          dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
100     __m128d          dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
101     __m128d          dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
102     __m128d          dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
103     __m128d          dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
104     __m128d          dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
105     __m128d          dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
106     __m128d          velec,felec,velecsum,facel,crf,krf,krf2;
107     real             *charge;
108     int              nvdwtype;
109     __m128d          rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
110     int              *vdwtype;
111     real             *vdwparam;
112     __m128d          one_sixth   = _mm_set1_pd(1.0/6.0);
113     __m128d          one_twelfth = _mm_set1_pd(1.0/12.0);
114     __m128d          dummy_mask,cutoff_mask;
115     __m128d          signbit   = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
116     __m128d          one     = _mm_set1_pd(1.0);
117     __m128d          two     = _mm_set1_pd(2.0);
118     x                = xx[0];
119     f                = ff[0];
120
121     nri              = nlist->nri;
122     iinr             = nlist->iinr;
123     jindex           = nlist->jindex;
124     jjnr             = nlist->jjnr;
125     shiftidx         = nlist->shift;
126     gid              = nlist->gid;
127     shiftvec         = fr->shift_vec[0];
128     fshift           = fr->fshift[0];
129     facel            = _mm_set1_pd(fr->epsfac);
130     charge           = mdatoms->chargeA;
131     krf              = _mm_set1_pd(fr->ic->k_rf);
132     krf2             = _mm_set1_pd(fr->ic->k_rf*2.0);
133     crf              = _mm_set1_pd(fr->ic->c_rf);
134     nvdwtype         = fr->ntype;
135     vdwparam         = fr->nbfp;
136     vdwtype          = mdatoms->typeA;
137
138     /* Setup water-specific parameters */
139     inr              = nlist->iinr[0];
140     iq1              = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+1]));
141     iq2              = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+2]));
142     iq3              = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+3]));
143     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
144
145     jq1              = _mm_set1_pd(charge[inr+1]);
146     jq2              = _mm_set1_pd(charge[inr+2]);
147     jq3              = _mm_set1_pd(charge[inr+3]);
148     vdwjidx0A        = 2*vdwtype[inr+0];
149     c6_00            = _mm_set1_pd(vdwparam[vdwioffset0+vdwjidx0A]);
150     c12_00           = _mm_set1_pd(vdwparam[vdwioffset0+vdwjidx0A+1]);
151     qq11             = _mm_mul_pd(iq1,jq1);
152     qq12             = _mm_mul_pd(iq1,jq2);
153     qq13             = _mm_mul_pd(iq1,jq3);
154     qq21             = _mm_mul_pd(iq2,jq1);
155     qq22             = _mm_mul_pd(iq2,jq2);
156     qq23             = _mm_mul_pd(iq2,jq3);
157     qq31             = _mm_mul_pd(iq3,jq1);
158     qq32             = _mm_mul_pd(iq3,jq2);
159     qq33             = _mm_mul_pd(iq3,jq3);
160
161     /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
162     rcutoff_scalar   = fr->rcoulomb;
163     rcutoff          = _mm_set1_pd(rcutoff_scalar);
164     rcutoff2         = _mm_mul_pd(rcutoff,rcutoff);
165
166     sh_vdw_invrcut6  = _mm_set1_pd(fr->ic->sh_invrc6);
167     rvdw             = _mm_set1_pd(fr->rvdw);
168
169     /* Avoid stupid compiler warnings */
170     jnrA = jnrB = 0;
171     j_coord_offsetA = 0;
172     j_coord_offsetB = 0;
173
174     outeriter        = 0;
175     inneriter        = 0;
176
177     /* Start outer loop over neighborlists */
178     for(iidx=0; iidx<nri; iidx++)
179     {
180         /* Load shift vector for this list */
181         i_shift_offset   = DIM*shiftidx[iidx];
182
183         /* Load limits for loop over neighbors */
184         j_index_start    = jindex[iidx];
185         j_index_end      = jindex[iidx+1];
186
187         /* Get outer coordinate index */
188         inr              = iinr[iidx];
189         i_coord_offset   = DIM*inr;
190
191         /* Load i particle coords and add shift vector */
192         gmx_mm_load_shift_and_4rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,
193                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
194
195         fix0             = _mm_setzero_pd();
196         fiy0             = _mm_setzero_pd();
197         fiz0             = _mm_setzero_pd();
198         fix1             = _mm_setzero_pd();
199         fiy1             = _mm_setzero_pd();
200         fiz1             = _mm_setzero_pd();
201         fix2             = _mm_setzero_pd();
202         fiy2             = _mm_setzero_pd();
203         fiz2             = _mm_setzero_pd();
204         fix3             = _mm_setzero_pd();
205         fiy3             = _mm_setzero_pd();
206         fiz3             = _mm_setzero_pd();
207
208         /* Reset potential sums */
209         velecsum         = _mm_setzero_pd();
210         vvdwsum          = _mm_setzero_pd();
211
212         /* Start inner kernel loop */
213         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
214         {
215
216             /* Get j neighbor index, and coordinate index */
217             jnrA             = jjnr[jidx];
218             jnrB             = jjnr[jidx+1];
219             j_coord_offsetA  = DIM*jnrA;
220             j_coord_offsetB  = DIM*jnrB;
221
222             /* load j atom coordinates */
223             gmx_mm_load_4rvec_2ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
224                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
225                                               &jy2,&jz2,&jx3,&jy3,&jz3);
226
227             /* Calculate displacement vector */
228             dx00             = _mm_sub_pd(ix0,jx0);
229             dy00             = _mm_sub_pd(iy0,jy0);
230             dz00             = _mm_sub_pd(iz0,jz0);
231             dx11             = _mm_sub_pd(ix1,jx1);
232             dy11             = _mm_sub_pd(iy1,jy1);
233             dz11             = _mm_sub_pd(iz1,jz1);
234             dx12             = _mm_sub_pd(ix1,jx2);
235             dy12             = _mm_sub_pd(iy1,jy2);
236             dz12             = _mm_sub_pd(iz1,jz2);
237             dx13             = _mm_sub_pd(ix1,jx3);
238             dy13             = _mm_sub_pd(iy1,jy3);
239             dz13             = _mm_sub_pd(iz1,jz3);
240             dx21             = _mm_sub_pd(ix2,jx1);
241             dy21             = _mm_sub_pd(iy2,jy1);
242             dz21             = _mm_sub_pd(iz2,jz1);
243             dx22             = _mm_sub_pd(ix2,jx2);
244             dy22             = _mm_sub_pd(iy2,jy2);
245             dz22             = _mm_sub_pd(iz2,jz2);
246             dx23             = _mm_sub_pd(ix2,jx3);
247             dy23             = _mm_sub_pd(iy2,jy3);
248             dz23             = _mm_sub_pd(iz2,jz3);
249             dx31             = _mm_sub_pd(ix3,jx1);
250             dy31             = _mm_sub_pd(iy3,jy1);
251             dz31             = _mm_sub_pd(iz3,jz1);
252             dx32             = _mm_sub_pd(ix3,jx2);
253             dy32             = _mm_sub_pd(iy3,jy2);
254             dz32             = _mm_sub_pd(iz3,jz2);
255             dx33             = _mm_sub_pd(ix3,jx3);
256             dy33             = _mm_sub_pd(iy3,jy3);
257             dz33             = _mm_sub_pd(iz3,jz3);
258
259             /* Calculate squared distance and things based on it */
260             rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
261             rsq11            = gmx_mm_calc_rsq_pd(dx11,dy11,dz11);
262             rsq12            = gmx_mm_calc_rsq_pd(dx12,dy12,dz12);
263             rsq13            = gmx_mm_calc_rsq_pd(dx13,dy13,dz13);
264             rsq21            = gmx_mm_calc_rsq_pd(dx21,dy21,dz21);
265             rsq22            = gmx_mm_calc_rsq_pd(dx22,dy22,dz22);
266             rsq23            = gmx_mm_calc_rsq_pd(dx23,dy23,dz23);
267             rsq31            = gmx_mm_calc_rsq_pd(dx31,dy31,dz31);
268             rsq32            = gmx_mm_calc_rsq_pd(dx32,dy32,dz32);
269             rsq33            = gmx_mm_calc_rsq_pd(dx33,dy33,dz33);
270
271             rinv11           = gmx_mm_invsqrt_pd(rsq11);
272             rinv12           = gmx_mm_invsqrt_pd(rsq12);
273             rinv13           = gmx_mm_invsqrt_pd(rsq13);
274             rinv21           = gmx_mm_invsqrt_pd(rsq21);
275             rinv22           = gmx_mm_invsqrt_pd(rsq22);
276             rinv23           = gmx_mm_invsqrt_pd(rsq23);
277             rinv31           = gmx_mm_invsqrt_pd(rsq31);
278             rinv32           = gmx_mm_invsqrt_pd(rsq32);
279             rinv33           = gmx_mm_invsqrt_pd(rsq33);
280
281             rinvsq00         = gmx_mm_inv_pd(rsq00);
282             rinvsq11         = _mm_mul_pd(rinv11,rinv11);
283             rinvsq12         = _mm_mul_pd(rinv12,rinv12);
284             rinvsq13         = _mm_mul_pd(rinv13,rinv13);
285             rinvsq21         = _mm_mul_pd(rinv21,rinv21);
286             rinvsq22         = _mm_mul_pd(rinv22,rinv22);
287             rinvsq23         = _mm_mul_pd(rinv23,rinv23);
288             rinvsq31         = _mm_mul_pd(rinv31,rinv31);
289             rinvsq32         = _mm_mul_pd(rinv32,rinv32);
290             rinvsq33         = _mm_mul_pd(rinv33,rinv33);
291
292             fjx0             = _mm_setzero_pd();
293             fjy0             = _mm_setzero_pd();
294             fjz0             = _mm_setzero_pd();
295             fjx1             = _mm_setzero_pd();
296             fjy1             = _mm_setzero_pd();
297             fjz1             = _mm_setzero_pd();
298             fjx2             = _mm_setzero_pd();
299             fjy2             = _mm_setzero_pd();
300             fjz2             = _mm_setzero_pd();
301             fjx3             = _mm_setzero_pd();
302             fjy3             = _mm_setzero_pd();
303             fjz3             = _mm_setzero_pd();
304
305             /**************************
306              * CALCULATE INTERACTIONS *
307              **************************/
308
309             if (gmx_mm_any_lt(rsq00,rcutoff2))
310             {
311
312             /* LENNARD-JONES DISPERSION/REPULSION */
313
314             rinvsix          = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
315             vvdw6            = _mm_mul_pd(c6_00,rinvsix);
316             vvdw12           = _mm_mul_pd(c12_00,_mm_mul_pd(rinvsix,rinvsix));
317             vvdw             = _mm_sub_pd(_mm_mul_pd( _mm_sub_pd(vvdw12 , _mm_mul_pd(c12_00,_mm_mul_pd(sh_vdw_invrcut6,sh_vdw_invrcut6))), one_twelfth) ,
318                                           _mm_mul_pd( _mm_sub_pd(vvdw6,_mm_mul_pd(c6_00,sh_vdw_invrcut6)),one_sixth));
319             fvdw             = _mm_mul_pd(_mm_sub_pd(vvdw12,vvdw6),rinvsq00);
320
321             cutoff_mask      = _mm_cmplt_pd(rsq00,rcutoff2);
322
323             /* Update potential sum for this i atom from the interaction with this j atom. */
324             vvdw             = _mm_and_pd(vvdw,cutoff_mask);
325             vvdwsum          = _mm_add_pd(vvdwsum,vvdw);
326
327             fscal            = fvdw;
328
329             fscal            = _mm_and_pd(fscal,cutoff_mask);
330
331             /* Calculate temporary vectorial force */
332             tx               = _mm_mul_pd(fscal,dx00);
333             ty               = _mm_mul_pd(fscal,dy00);
334             tz               = _mm_mul_pd(fscal,dz00);
335
336             /* Update vectorial force */
337             fix0             = _mm_add_pd(fix0,tx);
338             fiy0             = _mm_add_pd(fiy0,ty);
339             fiz0             = _mm_add_pd(fiz0,tz);
340
341             fjx0             = _mm_add_pd(fjx0,tx);
342             fjy0             = _mm_add_pd(fjy0,ty);
343             fjz0             = _mm_add_pd(fjz0,tz);
344
345             }
346
347             /**************************
348              * CALCULATE INTERACTIONS *
349              **************************/
350
351             if (gmx_mm_any_lt(rsq11,rcutoff2))
352             {
353
354             /* REACTION-FIELD ELECTROSTATICS */
355             velec            = _mm_mul_pd(qq11,_mm_sub_pd(_mm_add_pd(rinv11,_mm_mul_pd(krf,rsq11)),crf));
356             felec            = _mm_mul_pd(qq11,_mm_sub_pd(_mm_mul_pd(rinv11,rinvsq11),krf2));
357
358             cutoff_mask      = _mm_cmplt_pd(rsq11,rcutoff2);
359
360             /* Update potential sum for this i atom from the interaction with this j atom. */
361             velec            = _mm_and_pd(velec,cutoff_mask);
362             velecsum         = _mm_add_pd(velecsum,velec);
363
364             fscal            = felec;
365
366             fscal            = _mm_and_pd(fscal,cutoff_mask);
367
368             /* Calculate temporary vectorial force */
369             tx               = _mm_mul_pd(fscal,dx11);
370             ty               = _mm_mul_pd(fscal,dy11);
371             tz               = _mm_mul_pd(fscal,dz11);
372
373             /* Update vectorial force */
374             fix1             = _mm_add_pd(fix1,tx);
375             fiy1             = _mm_add_pd(fiy1,ty);
376             fiz1             = _mm_add_pd(fiz1,tz);
377
378             fjx1             = _mm_add_pd(fjx1,tx);
379             fjy1             = _mm_add_pd(fjy1,ty);
380             fjz1             = _mm_add_pd(fjz1,tz);
381
382             }
383
384             /**************************
385              * CALCULATE INTERACTIONS *
386              **************************/
387
388             if (gmx_mm_any_lt(rsq12,rcutoff2))
389             {
390
391             /* REACTION-FIELD ELECTROSTATICS */
392             velec            = _mm_mul_pd(qq12,_mm_sub_pd(_mm_add_pd(rinv12,_mm_mul_pd(krf,rsq12)),crf));
393             felec            = _mm_mul_pd(qq12,_mm_sub_pd(_mm_mul_pd(rinv12,rinvsq12),krf2));
394
395             cutoff_mask      = _mm_cmplt_pd(rsq12,rcutoff2);
396
397             /* Update potential sum for this i atom from the interaction with this j atom. */
398             velec            = _mm_and_pd(velec,cutoff_mask);
399             velecsum         = _mm_add_pd(velecsum,velec);
400
401             fscal            = felec;
402
403             fscal            = _mm_and_pd(fscal,cutoff_mask);
404
405             /* Calculate temporary vectorial force */
406             tx               = _mm_mul_pd(fscal,dx12);
407             ty               = _mm_mul_pd(fscal,dy12);
408             tz               = _mm_mul_pd(fscal,dz12);
409
410             /* Update vectorial force */
411             fix1             = _mm_add_pd(fix1,tx);
412             fiy1             = _mm_add_pd(fiy1,ty);
413             fiz1             = _mm_add_pd(fiz1,tz);
414
415             fjx2             = _mm_add_pd(fjx2,tx);
416             fjy2             = _mm_add_pd(fjy2,ty);
417             fjz2             = _mm_add_pd(fjz2,tz);
418
419             }
420
421             /**************************
422              * CALCULATE INTERACTIONS *
423              **************************/
424
425             if (gmx_mm_any_lt(rsq13,rcutoff2))
426             {
427
428             /* REACTION-FIELD ELECTROSTATICS */
429             velec            = _mm_mul_pd(qq13,_mm_sub_pd(_mm_add_pd(rinv13,_mm_mul_pd(krf,rsq13)),crf));
430             felec            = _mm_mul_pd(qq13,_mm_sub_pd(_mm_mul_pd(rinv13,rinvsq13),krf2));
431
432             cutoff_mask      = _mm_cmplt_pd(rsq13,rcutoff2);
433
434             /* Update potential sum for this i atom from the interaction with this j atom. */
435             velec            = _mm_and_pd(velec,cutoff_mask);
436             velecsum         = _mm_add_pd(velecsum,velec);
437
438             fscal            = felec;
439
440             fscal            = _mm_and_pd(fscal,cutoff_mask);
441
442             /* Calculate temporary vectorial force */
443             tx               = _mm_mul_pd(fscal,dx13);
444             ty               = _mm_mul_pd(fscal,dy13);
445             tz               = _mm_mul_pd(fscal,dz13);
446
447             /* Update vectorial force */
448             fix1             = _mm_add_pd(fix1,tx);
449             fiy1             = _mm_add_pd(fiy1,ty);
450             fiz1             = _mm_add_pd(fiz1,tz);
451
452             fjx3             = _mm_add_pd(fjx3,tx);
453             fjy3             = _mm_add_pd(fjy3,ty);
454             fjz3             = _mm_add_pd(fjz3,tz);
455
456             }
457
458             /**************************
459              * CALCULATE INTERACTIONS *
460              **************************/
461
462             if (gmx_mm_any_lt(rsq21,rcutoff2))
463             {
464
465             /* REACTION-FIELD ELECTROSTATICS */
466             velec            = _mm_mul_pd(qq21,_mm_sub_pd(_mm_add_pd(rinv21,_mm_mul_pd(krf,rsq21)),crf));
467             felec            = _mm_mul_pd(qq21,_mm_sub_pd(_mm_mul_pd(rinv21,rinvsq21),krf2));
468
469             cutoff_mask      = _mm_cmplt_pd(rsq21,rcutoff2);
470
471             /* Update potential sum for this i atom from the interaction with this j atom. */
472             velec            = _mm_and_pd(velec,cutoff_mask);
473             velecsum         = _mm_add_pd(velecsum,velec);
474
475             fscal            = felec;
476
477             fscal            = _mm_and_pd(fscal,cutoff_mask);
478
479             /* Calculate temporary vectorial force */
480             tx               = _mm_mul_pd(fscal,dx21);
481             ty               = _mm_mul_pd(fscal,dy21);
482             tz               = _mm_mul_pd(fscal,dz21);
483
484             /* Update vectorial force */
485             fix2             = _mm_add_pd(fix2,tx);
486             fiy2             = _mm_add_pd(fiy2,ty);
487             fiz2             = _mm_add_pd(fiz2,tz);
488
489             fjx1             = _mm_add_pd(fjx1,tx);
490             fjy1             = _mm_add_pd(fjy1,ty);
491             fjz1             = _mm_add_pd(fjz1,tz);
492
493             }
494
495             /**************************
496              * CALCULATE INTERACTIONS *
497              **************************/
498
499             if (gmx_mm_any_lt(rsq22,rcutoff2))
500             {
501
502             /* REACTION-FIELD ELECTROSTATICS */
503             velec            = _mm_mul_pd(qq22,_mm_sub_pd(_mm_add_pd(rinv22,_mm_mul_pd(krf,rsq22)),crf));
504             felec            = _mm_mul_pd(qq22,_mm_sub_pd(_mm_mul_pd(rinv22,rinvsq22),krf2));
505
506             cutoff_mask      = _mm_cmplt_pd(rsq22,rcutoff2);
507
508             /* Update potential sum for this i atom from the interaction with this j atom. */
509             velec            = _mm_and_pd(velec,cutoff_mask);
510             velecsum         = _mm_add_pd(velecsum,velec);
511
512             fscal            = felec;
513
514             fscal            = _mm_and_pd(fscal,cutoff_mask);
515
516             /* Calculate temporary vectorial force */
517             tx               = _mm_mul_pd(fscal,dx22);
518             ty               = _mm_mul_pd(fscal,dy22);
519             tz               = _mm_mul_pd(fscal,dz22);
520
521             /* Update vectorial force */
522             fix2             = _mm_add_pd(fix2,tx);
523             fiy2             = _mm_add_pd(fiy2,ty);
524             fiz2             = _mm_add_pd(fiz2,tz);
525
526             fjx2             = _mm_add_pd(fjx2,tx);
527             fjy2             = _mm_add_pd(fjy2,ty);
528             fjz2             = _mm_add_pd(fjz2,tz);
529
530             }
531
532             /**************************
533              * CALCULATE INTERACTIONS *
534              **************************/
535
536             if (gmx_mm_any_lt(rsq23,rcutoff2))
537             {
538
539             /* REACTION-FIELD ELECTROSTATICS */
540             velec            = _mm_mul_pd(qq23,_mm_sub_pd(_mm_add_pd(rinv23,_mm_mul_pd(krf,rsq23)),crf));
541             felec            = _mm_mul_pd(qq23,_mm_sub_pd(_mm_mul_pd(rinv23,rinvsq23),krf2));
542
543             cutoff_mask      = _mm_cmplt_pd(rsq23,rcutoff2);
544
545             /* Update potential sum for this i atom from the interaction with this j atom. */
546             velec            = _mm_and_pd(velec,cutoff_mask);
547             velecsum         = _mm_add_pd(velecsum,velec);
548
549             fscal            = felec;
550
551             fscal            = _mm_and_pd(fscal,cutoff_mask);
552
553             /* Calculate temporary vectorial force */
554             tx               = _mm_mul_pd(fscal,dx23);
555             ty               = _mm_mul_pd(fscal,dy23);
556             tz               = _mm_mul_pd(fscal,dz23);
557
558             /* Update vectorial force */
559             fix2             = _mm_add_pd(fix2,tx);
560             fiy2             = _mm_add_pd(fiy2,ty);
561             fiz2             = _mm_add_pd(fiz2,tz);
562
563             fjx3             = _mm_add_pd(fjx3,tx);
564             fjy3             = _mm_add_pd(fjy3,ty);
565             fjz3             = _mm_add_pd(fjz3,tz);
566
567             }
568
569             /**************************
570              * CALCULATE INTERACTIONS *
571              **************************/
572
573             if (gmx_mm_any_lt(rsq31,rcutoff2))
574             {
575
576             /* REACTION-FIELD ELECTROSTATICS */
577             velec            = _mm_mul_pd(qq31,_mm_sub_pd(_mm_add_pd(rinv31,_mm_mul_pd(krf,rsq31)),crf));
578             felec            = _mm_mul_pd(qq31,_mm_sub_pd(_mm_mul_pd(rinv31,rinvsq31),krf2));
579
580             cutoff_mask      = _mm_cmplt_pd(rsq31,rcutoff2);
581
582             /* Update potential sum for this i atom from the interaction with this j atom. */
583             velec            = _mm_and_pd(velec,cutoff_mask);
584             velecsum         = _mm_add_pd(velecsum,velec);
585
586             fscal            = felec;
587
588             fscal            = _mm_and_pd(fscal,cutoff_mask);
589
590             /* Calculate temporary vectorial force */
591             tx               = _mm_mul_pd(fscal,dx31);
592             ty               = _mm_mul_pd(fscal,dy31);
593             tz               = _mm_mul_pd(fscal,dz31);
594
595             /* Update vectorial force */
596             fix3             = _mm_add_pd(fix3,tx);
597             fiy3             = _mm_add_pd(fiy3,ty);
598             fiz3             = _mm_add_pd(fiz3,tz);
599
600             fjx1             = _mm_add_pd(fjx1,tx);
601             fjy1             = _mm_add_pd(fjy1,ty);
602             fjz1             = _mm_add_pd(fjz1,tz);
603
604             }
605
606             /**************************
607              * CALCULATE INTERACTIONS *
608              **************************/
609
610             if (gmx_mm_any_lt(rsq32,rcutoff2))
611             {
612
613             /* REACTION-FIELD ELECTROSTATICS */
614             velec            = _mm_mul_pd(qq32,_mm_sub_pd(_mm_add_pd(rinv32,_mm_mul_pd(krf,rsq32)),crf));
615             felec            = _mm_mul_pd(qq32,_mm_sub_pd(_mm_mul_pd(rinv32,rinvsq32),krf2));
616
617             cutoff_mask      = _mm_cmplt_pd(rsq32,rcutoff2);
618
619             /* Update potential sum for this i atom from the interaction with this j atom. */
620             velec            = _mm_and_pd(velec,cutoff_mask);
621             velecsum         = _mm_add_pd(velecsum,velec);
622
623             fscal            = felec;
624
625             fscal            = _mm_and_pd(fscal,cutoff_mask);
626
627             /* Calculate temporary vectorial force */
628             tx               = _mm_mul_pd(fscal,dx32);
629             ty               = _mm_mul_pd(fscal,dy32);
630             tz               = _mm_mul_pd(fscal,dz32);
631
632             /* Update vectorial force */
633             fix3             = _mm_add_pd(fix3,tx);
634             fiy3             = _mm_add_pd(fiy3,ty);
635             fiz3             = _mm_add_pd(fiz3,tz);
636
637             fjx2             = _mm_add_pd(fjx2,tx);
638             fjy2             = _mm_add_pd(fjy2,ty);
639             fjz2             = _mm_add_pd(fjz2,tz);
640
641             }
642
643             /**************************
644              * CALCULATE INTERACTIONS *
645              **************************/
646
647             if (gmx_mm_any_lt(rsq33,rcutoff2))
648             {
649
650             /* REACTION-FIELD ELECTROSTATICS */
651             velec            = _mm_mul_pd(qq33,_mm_sub_pd(_mm_add_pd(rinv33,_mm_mul_pd(krf,rsq33)),crf));
652             felec            = _mm_mul_pd(qq33,_mm_sub_pd(_mm_mul_pd(rinv33,rinvsq33),krf2));
653
654             cutoff_mask      = _mm_cmplt_pd(rsq33,rcutoff2);
655
656             /* Update potential sum for this i atom from the interaction with this j atom. */
657             velec            = _mm_and_pd(velec,cutoff_mask);
658             velecsum         = _mm_add_pd(velecsum,velec);
659
660             fscal            = felec;
661
662             fscal            = _mm_and_pd(fscal,cutoff_mask);
663
664             /* Calculate temporary vectorial force */
665             tx               = _mm_mul_pd(fscal,dx33);
666             ty               = _mm_mul_pd(fscal,dy33);
667             tz               = _mm_mul_pd(fscal,dz33);
668
669             /* Update vectorial force */
670             fix3             = _mm_add_pd(fix3,tx);
671             fiy3             = _mm_add_pd(fiy3,ty);
672             fiz3             = _mm_add_pd(fiz3,tz);
673
674             fjx3             = _mm_add_pd(fjx3,tx);
675             fjy3             = _mm_add_pd(fjy3,ty);
676             fjz3             = _mm_add_pd(fjz3,tz);
677
678             }
679
680             gmx_mm_decrement_4rvec_2ptr_swizzle_pd(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
681
682             /* Inner loop uses 368 flops */
683         }
684
685         if(jidx<j_index_end)
686         {
687
688             jnrA             = jjnr[jidx];
689             j_coord_offsetA  = DIM*jnrA;
690
691             /* load j atom coordinates */
692             gmx_mm_load_4rvec_1ptr_swizzle_pd(x+j_coord_offsetA,
693                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
694                                               &jy2,&jz2,&jx3,&jy3,&jz3);
695
696             /* Calculate displacement vector */
697             dx00             = _mm_sub_pd(ix0,jx0);
698             dy00             = _mm_sub_pd(iy0,jy0);
699             dz00             = _mm_sub_pd(iz0,jz0);
700             dx11             = _mm_sub_pd(ix1,jx1);
701             dy11             = _mm_sub_pd(iy1,jy1);
702             dz11             = _mm_sub_pd(iz1,jz1);
703             dx12             = _mm_sub_pd(ix1,jx2);
704             dy12             = _mm_sub_pd(iy1,jy2);
705             dz12             = _mm_sub_pd(iz1,jz2);
706             dx13             = _mm_sub_pd(ix1,jx3);
707             dy13             = _mm_sub_pd(iy1,jy3);
708             dz13             = _mm_sub_pd(iz1,jz3);
709             dx21             = _mm_sub_pd(ix2,jx1);
710             dy21             = _mm_sub_pd(iy2,jy1);
711             dz21             = _mm_sub_pd(iz2,jz1);
712             dx22             = _mm_sub_pd(ix2,jx2);
713             dy22             = _mm_sub_pd(iy2,jy2);
714             dz22             = _mm_sub_pd(iz2,jz2);
715             dx23             = _mm_sub_pd(ix2,jx3);
716             dy23             = _mm_sub_pd(iy2,jy3);
717             dz23             = _mm_sub_pd(iz2,jz3);
718             dx31             = _mm_sub_pd(ix3,jx1);
719             dy31             = _mm_sub_pd(iy3,jy1);
720             dz31             = _mm_sub_pd(iz3,jz1);
721             dx32             = _mm_sub_pd(ix3,jx2);
722             dy32             = _mm_sub_pd(iy3,jy2);
723             dz32             = _mm_sub_pd(iz3,jz2);
724             dx33             = _mm_sub_pd(ix3,jx3);
725             dy33             = _mm_sub_pd(iy3,jy3);
726             dz33             = _mm_sub_pd(iz3,jz3);
727
728             /* Calculate squared distance and things based on it */
729             rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
730             rsq11            = gmx_mm_calc_rsq_pd(dx11,dy11,dz11);
731             rsq12            = gmx_mm_calc_rsq_pd(dx12,dy12,dz12);
732             rsq13            = gmx_mm_calc_rsq_pd(dx13,dy13,dz13);
733             rsq21            = gmx_mm_calc_rsq_pd(dx21,dy21,dz21);
734             rsq22            = gmx_mm_calc_rsq_pd(dx22,dy22,dz22);
735             rsq23            = gmx_mm_calc_rsq_pd(dx23,dy23,dz23);
736             rsq31            = gmx_mm_calc_rsq_pd(dx31,dy31,dz31);
737             rsq32            = gmx_mm_calc_rsq_pd(dx32,dy32,dz32);
738             rsq33            = gmx_mm_calc_rsq_pd(dx33,dy33,dz33);
739
740             rinv11           = gmx_mm_invsqrt_pd(rsq11);
741             rinv12           = gmx_mm_invsqrt_pd(rsq12);
742             rinv13           = gmx_mm_invsqrt_pd(rsq13);
743             rinv21           = gmx_mm_invsqrt_pd(rsq21);
744             rinv22           = gmx_mm_invsqrt_pd(rsq22);
745             rinv23           = gmx_mm_invsqrt_pd(rsq23);
746             rinv31           = gmx_mm_invsqrt_pd(rsq31);
747             rinv32           = gmx_mm_invsqrt_pd(rsq32);
748             rinv33           = gmx_mm_invsqrt_pd(rsq33);
749
750             rinvsq00         = gmx_mm_inv_pd(rsq00);
751             rinvsq11         = _mm_mul_pd(rinv11,rinv11);
752             rinvsq12         = _mm_mul_pd(rinv12,rinv12);
753             rinvsq13         = _mm_mul_pd(rinv13,rinv13);
754             rinvsq21         = _mm_mul_pd(rinv21,rinv21);
755             rinvsq22         = _mm_mul_pd(rinv22,rinv22);
756             rinvsq23         = _mm_mul_pd(rinv23,rinv23);
757             rinvsq31         = _mm_mul_pd(rinv31,rinv31);
758             rinvsq32         = _mm_mul_pd(rinv32,rinv32);
759             rinvsq33         = _mm_mul_pd(rinv33,rinv33);
760
761             fjx0             = _mm_setzero_pd();
762             fjy0             = _mm_setzero_pd();
763             fjz0             = _mm_setzero_pd();
764             fjx1             = _mm_setzero_pd();
765             fjy1             = _mm_setzero_pd();
766             fjz1             = _mm_setzero_pd();
767             fjx2             = _mm_setzero_pd();
768             fjy2             = _mm_setzero_pd();
769             fjz2             = _mm_setzero_pd();
770             fjx3             = _mm_setzero_pd();
771             fjy3             = _mm_setzero_pd();
772             fjz3             = _mm_setzero_pd();
773
774             /**************************
775              * CALCULATE INTERACTIONS *
776              **************************/
777
778             if (gmx_mm_any_lt(rsq00,rcutoff2))
779             {
780
781             /* LENNARD-JONES DISPERSION/REPULSION */
782
783             rinvsix          = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
784             vvdw6            = _mm_mul_pd(c6_00,rinvsix);
785             vvdw12           = _mm_mul_pd(c12_00,_mm_mul_pd(rinvsix,rinvsix));
786             vvdw             = _mm_sub_pd(_mm_mul_pd( _mm_sub_pd(vvdw12 , _mm_mul_pd(c12_00,_mm_mul_pd(sh_vdw_invrcut6,sh_vdw_invrcut6))), one_twelfth) ,
787                                           _mm_mul_pd( _mm_sub_pd(vvdw6,_mm_mul_pd(c6_00,sh_vdw_invrcut6)),one_sixth));
788             fvdw             = _mm_mul_pd(_mm_sub_pd(vvdw12,vvdw6),rinvsq00);
789
790             cutoff_mask      = _mm_cmplt_pd(rsq00,rcutoff2);
791
792             /* Update potential sum for this i atom from the interaction with this j atom. */
793             vvdw             = _mm_and_pd(vvdw,cutoff_mask);
794             vvdw             = _mm_unpacklo_pd(vvdw,_mm_setzero_pd());
795             vvdwsum          = _mm_add_pd(vvdwsum,vvdw);
796
797             fscal            = fvdw;
798
799             fscal            = _mm_and_pd(fscal,cutoff_mask);
800
801             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
802
803             /* Calculate temporary vectorial force */
804             tx               = _mm_mul_pd(fscal,dx00);
805             ty               = _mm_mul_pd(fscal,dy00);
806             tz               = _mm_mul_pd(fscal,dz00);
807
808             /* Update vectorial force */
809             fix0             = _mm_add_pd(fix0,tx);
810             fiy0             = _mm_add_pd(fiy0,ty);
811             fiz0             = _mm_add_pd(fiz0,tz);
812
813             fjx0             = _mm_add_pd(fjx0,tx);
814             fjy0             = _mm_add_pd(fjy0,ty);
815             fjz0             = _mm_add_pd(fjz0,tz);
816
817             }
818
819             /**************************
820              * CALCULATE INTERACTIONS *
821              **************************/
822
823             if (gmx_mm_any_lt(rsq11,rcutoff2))
824             {
825
826             /* REACTION-FIELD ELECTROSTATICS */
827             velec            = _mm_mul_pd(qq11,_mm_sub_pd(_mm_add_pd(rinv11,_mm_mul_pd(krf,rsq11)),crf));
828             felec            = _mm_mul_pd(qq11,_mm_sub_pd(_mm_mul_pd(rinv11,rinvsq11),krf2));
829
830             cutoff_mask      = _mm_cmplt_pd(rsq11,rcutoff2);
831
832             /* Update potential sum for this i atom from the interaction with this j atom. */
833             velec            = _mm_and_pd(velec,cutoff_mask);
834             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
835             velecsum         = _mm_add_pd(velecsum,velec);
836
837             fscal            = felec;
838
839             fscal            = _mm_and_pd(fscal,cutoff_mask);
840
841             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
842
843             /* Calculate temporary vectorial force */
844             tx               = _mm_mul_pd(fscal,dx11);
845             ty               = _mm_mul_pd(fscal,dy11);
846             tz               = _mm_mul_pd(fscal,dz11);
847
848             /* Update vectorial force */
849             fix1             = _mm_add_pd(fix1,tx);
850             fiy1             = _mm_add_pd(fiy1,ty);
851             fiz1             = _mm_add_pd(fiz1,tz);
852
853             fjx1             = _mm_add_pd(fjx1,tx);
854             fjy1             = _mm_add_pd(fjy1,ty);
855             fjz1             = _mm_add_pd(fjz1,tz);
856
857             }
858
859             /**************************
860              * CALCULATE INTERACTIONS *
861              **************************/
862
863             if (gmx_mm_any_lt(rsq12,rcutoff2))
864             {
865
866             /* REACTION-FIELD ELECTROSTATICS */
867             velec            = _mm_mul_pd(qq12,_mm_sub_pd(_mm_add_pd(rinv12,_mm_mul_pd(krf,rsq12)),crf));
868             felec            = _mm_mul_pd(qq12,_mm_sub_pd(_mm_mul_pd(rinv12,rinvsq12),krf2));
869
870             cutoff_mask      = _mm_cmplt_pd(rsq12,rcutoff2);
871
872             /* Update potential sum for this i atom from the interaction with this j atom. */
873             velec            = _mm_and_pd(velec,cutoff_mask);
874             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
875             velecsum         = _mm_add_pd(velecsum,velec);
876
877             fscal            = felec;
878
879             fscal            = _mm_and_pd(fscal,cutoff_mask);
880
881             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
882
883             /* Calculate temporary vectorial force */
884             tx               = _mm_mul_pd(fscal,dx12);
885             ty               = _mm_mul_pd(fscal,dy12);
886             tz               = _mm_mul_pd(fscal,dz12);
887
888             /* Update vectorial force */
889             fix1             = _mm_add_pd(fix1,tx);
890             fiy1             = _mm_add_pd(fiy1,ty);
891             fiz1             = _mm_add_pd(fiz1,tz);
892
893             fjx2             = _mm_add_pd(fjx2,tx);
894             fjy2             = _mm_add_pd(fjy2,ty);
895             fjz2             = _mm_add_pd(fjz2,tz);
896
897             }
898
899             /**************************
900              * CALCULATE INTERACTIONS *
901              **************************/
902
903             if (gmx_mm_any_lt(rsq13,rcutoff2))
904             {
905
906             /* REACTION-FIELD ELECTROSTATICS */
907             velec            = _mm_mul_pd(qq13,_mm_sub_pd(_mm_add_pd(rinv13,_mm_mul_pd(krf,rsq13)),crf));
908             felec            = _mm_mul_pd(qq13,_mm_sub_pd(_mm_mul_pd(rinv13,rinvsq13),krf2));
909
910             cutoff_mask      = _mm_cmplt_pd(rsq13,rcutoff2);
911
912             /* Update potential sum for this i atom from the interaction with this j atom. */
913             velec            = _mm_and_pd(velec,cutoff_mask);
914             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
915             velecsum         = _mm_add_pd(velecsum,velec);
916
917             fscal            = felec;
918
919             fscal            = _mm_and_pd(fscal,cutoff_mask);
920
921             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
922
923             /* Calculate temporary vectorial force */
924             tx               = _mm_mul_pd(fscal,dx13);
925             ty               = _mm_mul_pd(fscal,dy13);
926             tz               = _mm_mul_pd(fscal,dz13);
927
928             /* Update vectorial force */
929             fix1             = _mm_add_pd(fix1,tx);
930             fiy1             = _mm_add_pd(fiy1,ty);
931             fiz1             = _mm_add_pd(fiz1,tz);
932
933             fjx3             = _mm_add_pd(fjx3,tx);
934             fjy3             = _mm_add_pd(fjy3,ty);
935             fjz3             = _mm_add_pd(fjz3,tz);
936
937             }
938
939             /**************************
940              * CALCULATE INTERACTIONS *
941              **************************/
942
943             if (gmx_mm_any_lt(rsq21,rcutoff2))
944             {
945
946             /* REACTION-FIELD ELECTROSTATICS */
947             velec            = _mm_mul_pd(qq21,_mm_sub_pd(_mm_add_pd(rinv21,_mm_mul_pd(krf,rsq21)),crf));
948             felec            = _mm_mul_pd(qq21,_mm_sub_pd(_mm_mul_pd(rinv21,rinvsq21),krf2));
949
950             cutoff_mask      = _mm_cmplt_pd(rsq21,rcutoff2);
951
952             /* Update potential sum for this i atom from the interaction with this j atom. */
953             velec            = _mm_and_pd(velec,cutoff_mask);
954             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
955             velecsum         = _mm_add_pd(velecsum,velec);
956
957             fscal            = felec;
958
959             fscal            = _mm_and_pd(fscal,cutoff_mask);
960
961             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
962
963             /* Calculate temporary vectorial force */
964             tx               = _mm_mul_pd(fscal,dx21);
965             ty               = _mm_mul_pd(fscal,dy21);
966             tz               = _mm_mul_pd(fscal,dz21);
967
968             /* Update vectorial force */
969             fix2             = _mm_add_pd(fix2,tx);
970             fiy2             = _mm_add_pd(fiy2,ty);
971             fiz2             = _mm_add_pd(fiz2,tz);
972
973             fjx1             = _mm_add_pd(fjx1,tx);
974             fjy1             = _mm_add_pd(fjy1,ty);
975             fjz1             = _mm_add_pd(fjz1,tz);
976
977             }
978
979             /**************************
980              * CALCULATE INTERACTIONS *
981              **************************/
982
983             if (gmx_mm_any_lt(rsq22,rcutoff2))
984             {
985
986             /* REACTION-FIELD ELECTROSTATICS */
987             velec            = _mm_mul_pd(qq22,_mm_sub_pd(_mm_add_pd(rinv22,_mm_mul_pd(krf,rsq22)),crf));
988             felec            = _mm_mul_pd(qq22,_mm_sub_pd(_mm_mul_pd(rinv22,rinvsq22),krf2));
989
990             cutoff_mask      = _mm_cmplt_pd(rsq22,rcutoff2);
991
992             /* Update potential sum for this i atom from the interaction with this j atom. */
993             velec            = _mm_and_pd(velec,cutoff_mask);
994             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
995             velecsum         = _mm_add_pd(velecsum,velec);
996
997             fscal            = felec;
998
999             fscal            = _mm_and_pd(fscal,cutoff_mask);
1000
1001             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1002
1003             /* Calculate temporary vectorial force */
1004             tx               = _mm_mul_pd(fscal,dx22);
1005             ty               = _mm_mul_pd(fscal,dy22);
1006             tz               = _mm_mul_pd(fscal,dz22);
1007
1008             /* Update vectorial force */
1009             fix2             = _mm_add_pd(fix2,tx);
1010             fiy2             = _mm_add_pd(fiy2,ty);
1011             fiz2             = _mm_add_pd(fiz2,tz);
1012
1013             fjx2             = _mm_add_pd(fjx2,tx);
1014             fjy2             = _mm_add_pd(fjy2,ty);
1015             fjz2             = _mm_add_pd(fjz2,tz);
1016
1017             }
1018
1019             /**************************
1020              * CALCULATE INTERACTIONS *
1021              **************************/
1022
1023             if (gmx_mm_any_lt(rsq23,rcutoff2))
1024             {
1025
1026             /* REACTION-FIELD ELECTROSTATICS */
1027             velec            = _mm_mul_pd(qq23,_mm_sub_pd(_mm_add_pd(rinv23,_mm_mul_pd(krf,rsq23)),crf));
1028             felec            = _mm_mul_pd(qq23,_mm_sub_pd(_mm_mul_pd(rinv23,rinvsq23),krf2));
1029
1030             cutoff_mask      = _mm_cmplt_pd(rsq23,rcutoff2);
1031
1032             /* Update potential sum for this i atom from the interaction with this j atom. */
1033             velec            = _mm_and_pd(velec,cutoff_mask);
1034             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
1035             velecsum         = _mm_add_pd(velecsum,velec);
1036
1037             fscal            = felec;
1038
1039             fscal            = _mm_and_pd(fscal,cutoff_mask);
1040
1041             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1042
1043             /* Calculate temporary vectorial force */
1044             tx               = _mm_mul_pd(fscal,dx23);
1045             ty               = _mm_mul_pd(fscal,dy23);
1046             tz               = _mm_mul_pd(fscal,dz23);
1047
1048             /* Update vectorial force */
1049             fix2             = _mm_add_pd(fix2,tx);
1050             fiy2             = _mm_add_pd(fiy2,ty);
1051             fiz2             = _mm_add_pd(fiz2,tz);
1052
1053             fjx3             = _mm_add_pd(fjx3,tx);
1054             fjy3             = _mm_add_pd(fjy3,ty);
1055             fjz3             = _mm_add_pd(fjz3,tz);
1056
1057             }
1058
1059             /**************************
1060              * CALCULATE INTERACTIONS *
1061              **************************/
1062
1063             if (gmx_mm_any_lt(rsq31,rcutoff2))
1064             {
1065
1066             /* REACTION-FIELD ELECTROSTATICS */
1067             velec            = _mm_mul_pd(qq31,_mm_sub_pd(_mm_add_pd(rinv31,_mm_mul_pd(krf,rsq31)),crf));
1068             felec            = _mm_mul_pd(qq31,_mm_sub_pd(_mm_mul_pd(rinv31,rinvsq31),krf2));
1069
1070             cutoff_mask      = _mm_cmplt_pd(rsq31,rcutoff2);
1071
1072             /* Update potential sum for this i atom from the interaction with this j atom. */
1073             velec            = _mm_and_pd(velec,cutoff_mask);
1074             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
1075             velecsum         = _mm_add_pd(velecsum,velec);
1076
1077             fscal            = felec;
1078
1079             fscal            = _mm_and_pd(fscal,cutoff_mask);
1080
1081             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1082
1083             /* Calculate temporary vectorial force */
1084             tx               = _mm_mul_pd(fscal,dx31);
1085             ty               = _mm_mul_pd(fscal,dy31);
1086             tz               = _mm_mul_pd(fscal,dz31);
1087
1088             /* Update vectorial force */
1089             fix3             = _mm_add_pd(fix3,tx);
1090             fiy3             = _mm_add_pd(fiy3,ty);
1091             fiz3             = _mm_add_pd(fiz3,tz);
1092
1093             fjx1             = _mm_add_pd(fjx1,tx);
1094             fjy1             = _mm_add_pd(fjy1,ty);
1095             fjz1             = _mm_add_pd(fjz1,tz);
1096
1097             }
1098
1099             /**************************
1100              * CALCULATE INTERACTIONS *
1101              **************************/
1102
1103             if (gmx_mm_any_lt(rsq32,rcutoff2))
1104             {
1105
1106             /* REACTION-FIELD ELECTROSTATICS */
1107             velec            = _mm_mul_pd(qq32,_mm_sub_pd(_mm_add_pd(rinv32,_mm_mul_pd(krf,rsq32)),crf));
1108             felec            = _mm_mul_pd(qq32,_mm_sub_pd(_mm_mul_pd(rinv32,rinvsq32),krf2));
1109
1110             cutoff_mask      = _mm_cmplt_pd(rsq32,rcutoff2);
1111
1112             /* Update potential sum for this i atom from the interaction with this j atom. */
1113             velec            = _mm_and_pd(velec,cutoff_mask);
1114             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
1115             velecsum         = _mm_add_pd(velecsum,velec);
1116
1117             fscal            = felec;
1118
1119             fscal            = _mm_and_pd(fscal,cutoff_mask);
1120
1121             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1122
1123             /* Calculate temporary vectorial force */
1124             tx               = _mm_mul_pd(fscal,dx32);
1125             ty               = _mm_mul_pd(fscal,dy32);
1126             tz               = _mm_mul_pd(fscal,dz32);
1127
1128             /* Update vectorial force */
1129             fix3             = _mm_add_pd(fix3,tx);
1130             fiy3             = _mm_add_pd(fiy3,ty);
1131             fiz3             = _mm_add_pd(fiz3,tz);
1132
1133             fjx2             = _mm_add_pd(fjx2,tx);
1134             fjy2             = _mm_add_pd(fjy2,ty);
1135             fjz2             = _mm_add_pd(fjz2,tz);
1136
1137             }
1138
1139             /**************************
1140              * CALCULATE INTERACTIONS *
1141              **************************/
1142
1143             if (gmx_mm_any_lt(rsq33,rcutoff2))
1144             {
1145
1146             /* REACTION-FIELD ELECTROSTATICS */
1147             velec            = _mm_mul_pd(qq33,_mm_sub_pd(_mm_add_pd(rinv33,_mm_mul_pd(krf,rsq33)),crf));
1148             felec            = _mm_mul_pd(qq33,_mm_sub_pd(_mm_mul_pd(rinv33,rinvsq33),krf2));
1149
1150             cutoff_mask      = _mm_cmplt_pd(rsq33,rcutoff2);
1151
1152             /* Update potential sum for this i atom from the interaction with this j atom. */
1153             velec            = _mm_and_pd(velec,cutoff_mask);
1154             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
1155             velecsum         = _mm_add_pd(velecsum,velec);
1156
1157             fscal            = felec;
1158
1159             fscal            = _mm_and_pd(fscal,cutoff_mask);
1160
1161             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1162
1163             /* Calculate temporary vectorial force */
1164             tx               = _mm_mul_pd(fscal,dx33);
1165             ty               = _mm_mul_pd(fscal,dy33);
1166             tz               = _mm_mul_pd(fscal,dz33);
1167
1168             /* Update vectorial force */
1169             fix3             = _mm_add_pd(fix3,tx);
1170             fiy3             = _mm_add_pd(fiy3,ty);
1171             fiz3             = _mm_add_pd(fiz3,tz);
1172
1173             fjx3             = _mm_add_pd(fjx3,tx);
1174             fjy3             = _mm_add_pd(fjy3,ty);
1175             fjz3             = _mm_add_pd(fjz3,tz);
1176
1177             }
1178
1179             gmx_mm_decrement_4rvec_1ptr_swizzle_pd(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1180
1181             /* Inner loop uses 368 flops */
1182         }
1183
1184         /* End of innermost loop */
1185
1186         gmx_mm_update_iforce_4atom_swizzle_pd(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
1187                                               f+i_coord_offset,fshift+i_shift_offset);
1188
1189         ggid                        = gid[iidx];
1190         /* Update potential energies */
1191         gmx_mm_update_1pot_pd(velecsum,kernel_data->energygrp_elec+ggid);
1192         gmx_mm_update_1pot_pd(vvdwsum,kernel_data->energygrp_vdw+ggid);
1193
1194         /* Increment number of inner iterations */
1195         inneriter                  += j_index_end - j_index_start;
1196
1197         /* Outer loop uses 26 flops */
1198     }
1199
1200     /* Increment number of outer iterations */
1201     outeriter        += nri;
1202
1203     /* Update outer/inner flops */
1204
1205     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*368);
1206 }
1207 /*
1208  * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_sse2_double
1209  * Electrostatics interaction: ReactionField
1210  * VdW interaction:            LennardJones
1211  * Geometry:                   Water4-Water4
1212  * Calculate force/pot:        Force
1213  */
1214 void
1215 nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_sse2_double
1216                     (t_nblist                    * gmx_restrict       nlist,
1217                      rvec                        * gmx_restrict          xx,
1218                      rvec                        * gmx_restrict          ff,
1219                      t_forcerec                  * gmx_restrict          fr,
1220                      t_mdatoms                   * gmx_restrict     mdatoms,
1221                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
1222                      t_nrnb                      * gmx_restrict        nrnb)
1223 {
1224     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1225      * just 0 for non-waters.
1226      * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
1227      * jnr indices corresponding to data put in the four positions in the SIMD register.
1228      */
1229     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
1230     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1231     int              jnrA,jnrB;
1232     int              j_coord_offsetA,j_coord_offsetB;
1233     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
1234     real             rcutoff_scalar;
1235     real             *shiftvec,*fshift,*x,*f;
1236     __m128d          tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1237     int              vdwioffset0;
1238     __m128d          ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1239     int              vdwioffset1;
1240     __m128d          ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1241     int              vdwioffset2;
1242     __m128d          ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1243     int              vdwioffset3;
1244     __m128d          ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
1245     int              vdwjidx0A,vdwjidx0B;
1246     __m128d          jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1247     int              vdwjidx1A,vdwjidx1B;
1248     __m128d          jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1249     int              vdwjidx2A,vdwjidx2B;
1250     __m128d          jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1251     int              vdwjidx3A,vdwjidx3B;
1252     __m128d          jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
1253     __m128d          dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1254     __m128d          dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1255     __m128d          dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1256     __m128d          dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
1257     __m128d          dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1258     __m128d          dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1259     __m128d          dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
1260     __m128d          dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
1261     __m128d          dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
1262     __m128d          dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
1263     __m128d          velec,felec,velecsum,facel,crf,krf,krf2;
1264     real             *charge;
1265     int              nvdwtype;
1266     __m128d          rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
1267     int              *vdwtype;
1268     real             *vdwparam;
1269     __m128d          one_sixth   = _mm_set1_pd(1.0/6.0);
1270     __m128d          one_twelfth = _mm_set1_pd(1.0/12.0);
1271     __m128d          dummy_mask,cutoff_mask;
1272     __m128d          signbit   = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
1273     __m128d          one     = _mm_set1_pd(1.0);
1274     __m128d          two     = _mm_set1_pd(2.0);
1275     x                = xx[0];
1276     f                = ff[0];
1277
1278     nri              = nlist->nri;
1279     iinr             = nlist->iinr;
1280     jindex           = nlist->jindex;
1281     jjnr             = nlist->jjnr;
1282     shiftidx         = nlist->shift;
1283     gid              = nlist->gid;
1284     shiftvec         = fr->shift_vec[0];
1285     fshift           = fr->fshift[0];
1286     facel            = _mm_set1_pd(fr->epsfac);
1287     charge           = mdatoms->chargeA;
1288     krf              = _mm_set1_pd(fr->ic->k_rf);
1289     krf2             = _mm_set1_pd(fr->ic->k_rf*2.0);
1290     crf              = _mm_set1_pd(fr->ic->c_rf);
1291     nvdwtype         = fr->ntype;
1292     vdwparam         = fr->nbfp;
1293     vdwtype          = mdatoms->typeA;
1294
1295     /* Setup water-specific parameters */
1296     inr              = nlist->iinr[0];
1297     iq1              = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+1]));
1298     iq2              = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+2]));
1299     iq3              = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+3]));
1300     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
1301
1302     jq1              = _mm_set1_pd(charge[inr+1]);
1303     jq2              = _mm_set1_pd(charge[inr+2]);
1304     jq3              = _mm_set1_pd(charge[inr+3]);
1305     vdwjidx0A        = 2*vdwtype[inr+0];
1306     c6_00            = _mm_set1_pd(vdwparam[vdwioffset0+vdwjidx0A]);
1307     c12_00           = _mm_set1_pd(vdwparam[vdwioffset0+vdwjidx0A+1]);
1308     qq11             = _mm_mul_pd(iq1,jq1);
1309     qq12             = _mm_mul_pd(iq1,jq2);
1310     qq13             = _mm_mul_pd(iq1,jq3);
1311     qq21             = _mm_mul_pd(iq2,jq1);
1312     qq22             = _mm_mul_pd(iq2,jq2);
1313     qq23             = _mm_mul_pd(iq2,jq3);
1314     qq31             = _mm_mul_pd(iq3,jq1);
1315     qq32             = _mm_mul_pd(iq3,jq2);
1316     qq33             = _mm_mul_pd(iq3,jq3);
1317
1318     /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
1319     rcutoff_scalar   = fr->rcoulomb;
1320     rcutoff          = _mm_set1_pd(rcutoff_scalar);
1321     rcutoff2         = _mm_mul_pd(rcutoff,rcutoff);
1322
1323     sh_vdw_invrcut6  = _mm_set1_pd(fr->ic->sh_invrc6);
1324     rvdw             = _mm_set1_pd(fr->rvdw);
1325
1326     /* Avoid stupid compiler warnings */
1327     jnrA = jnrB = 0;
1328     j_coord_offsetA = 0;
1329     j_coord_offsetB = 0;
1330
1331     outeriter        = 0;
1332     inneriter        = 0;
1333
1334     /* Start outer loop over neighborlists */
1335     for(iidx=0; iidx<nri; iidx++)
1336     {
1337         /* Load shift vector for this list */
1338         i_shift_offset   = DIM*shiftidx[iidx];
1339
1340         /* Load limits for loop over neighbors */
1341         j_index_start    = jindex[iidx];
1342         j_index_end      = jindex[iidx+1];
1343
1344         /* Get outer coordinate index */
1345         inr              = iinr[iidx];
1346         i_coord_offset   = DIM*inr;
1347
1348         /* Load i particle coords and add shift vector */
1349         gmx_mm_load_shift_and_4rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,
1350                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
1351
1352         fix0             = _mm_setzero_pd();
1353         fiy0             = _mm_setzero_pd();
1354         fiz0             = _mm_setzero_pd();
1355         fix1             = _mm_setzero_pd();
1356         fiy1             = _mm_setzero_pd();
1357         fiz1             = _mm_setzero_pd();
1358         fix2             = _mm_setzero_pd();
1359         fiy2             = _mm_setzero_pd();
1360         fiz2             = _mm_setzero_pd();
1361         fix3             = _mm_setzero_pd();
1362         fiy3             = _mm_setzero_pd();
1363         fiz3             = _mm_setzero_pd();
1364
1365         /* Start inner kernel loop */
1366         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1367         {
1368
1369             /* Get j neighbor index, and coordinate index */
1370             jnrA             = jjnr[jidx];
1371             jnrB             = jjnr[jidx+1];
1372             j_coord_offsetA  = DIM*jnrA;
1373             j_coord_offsetB  = DIM*jnrB;
1374
1375             /* load j atom coordinates */
1376             gmx_mm_load_4rvec_2ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
1377                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
1378                                               &jy2,&jz2,&jx3,&jy3,&jz3);
1379
1380             /* Calculate displacement vector */
1381             dx00             = _mm_sub_pd(ix0,jx0);
1382             dy00             = _mm_sub_pd(iy0,jy0);
1383             dz00             = _mm_sub_pd(iz0,jz0);
1384             dx11             = _mm_sub_pd(ix1,jx1);
1385             dy11             = _mm_sub_pd(iy1,jy1);
1386             dz11             = _mm_sub_pd(iz1,jz1);
1387             dx12             = _mm_sub_pd(ix1,jx2);
1388             dy12             = _mm_sub_pd(iy1,jy2);
1389             dz12             = _mm_sub_pd(iz1,jz2);
1390             dx13             = _mm_sub_pd(ix1,jx3);
1391             dy13             = _mm_sub_pd(iy1,jy3);
1392             dz13             = _mm_sub_pd(iz1,jz3);
1393             dx21             = _mm_sub_pd(ix2,jx1);
1394             dy21             = _mm_sub_pd(iy2,jy1);
1395             dz21             = _mm_sub_pd(iz2,jz1);
1396             dx22             = _mm_sub_pd(ix2,jx2);
1397             dy22             = _mm_sub_pd(iy2,jy2);
1398             dz22             = _mm_sub_pd(iz2,jz2);
1399             dx23             = _mm_sub_pd(ix2,jx3);
1400             dy23             = _mm_sub_pd(iy2,jy3);
1401             dz23             = _mm_sub_pd(iz2,jz3);
1402             dx31             = _mm_sub_pd(ix3,jx1);
1403             dy31             = _mm_sub_pd(iy3,jy1);
1404             dz31             = _mm_sub_pd(iz3,jz1);
1405             dx32             = _mm_sub_pd(ix3,jx2);
1406             dy32             = _mm_sub_pd(iy3,jy2);
1407             dz32             = _mm_sub_pd(iz3,jz2);
1408             dx33             = _mm_sub_pd(ix3,jx3);
1409             dy33             = _mm_sub_pd(iy3,jy3);
1410             dz33             = _mm_sub_pd(iz3,jz3);
1411
1412             /* Calculate squared distance and things based on it */
1413             rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
1414             rsq11            = gmx_mm_calc_rsq_pd(dx11,dy11,dz11);
1415             rsq12            = gmx_mm_calc_rsq_pd(dx12,dy12,dz12);
1416             rsq13            = gmx_mm_calc_rsq_pd(dx13,dy13,dz13);
1417             rsq21            = gmx_mm_calc_rsq_pd(dx21,dy21,dz21);
1418             rsq22            = gmx_mm_calc_rsq_pd(dx22,dy22,dz22);
1419             rsq23            = gmx_mm_calc_rsq_pd(dx23,dy23,dz23);
1420             rsq31            = gmx_mm_calc_rsq_pd(dx31,dy31,dz31);
1421             rsq32            = gmx_mm_calc_rsq_pd(dx32,dy32,dz32);
1422             rsq33            = gmx_mm_calc_rsq_pd(dx33,dy33,dz33);
1423
1424             rinv11           = gmx_mm_invsqrt_pd(rsq11);
1425             rinv12           = gmx_mm_invsqrt_pd(rsq12);
1426             rinv13           = gmx_mm_invsqrt_pd(rsq13);
1427             rinv21           = gmx_mm_invsqrt_pd(rsq21);
1428             rinv22           = gmx_mm_invsqrt_pd(rsq22);
1429             rinv23           = gmx_mm_invsqrt_pd(rsq23);
1430             rinv31           = gmx_mm_invsqrt_pd(rsq31);
1431             rinv32           = gmx_mm_invsqrt_pd(rsq32);
1432             rinv33           = gmx_mm_invsqrt_pd(rsq33);
1433
1434             rinvsq00         = gmx_mm_inv_pd(rsq00);
1435             rinvsq11         = _mm_mul_pd(rinv11,rinv11);
1436             rinvsq12         = _mm_mul_pd(rinv12,rinv12);
1437             rinvsq13         = _mm_mul_pd(rinv13,rinv13);
1438             rinvsq21         = _mm_mul_pd(rinv21,rinv21);
1439             rinvsq22         = _mm_mul_pd(rinv22,rinv22);
1440             rinvsq23         = _mm_mul_pd(rinv23,rinv23);
1441             rinvsq31         = _mm_mul_pd(rinv31,rinv31);
1442             rinvsq32         = _mm_mul_pd(rinv32,rinv32);
1443             rinvsq33         = _mm_mul_pd(rinv33,rinv33);
1444
1445             fjx0             = _mm_setzero_pd();
1446             fjy0             = _mm_setzero_pd();
1447             fjz0             = _mm_setzero_pd();
1448             fjx1             = _mm_setzero_pd();
1449             fjy1             = _mm_setzero_pd();
1450             fjz1             = _mm_setzero_pd();
1451             fjx2             = _mm_setzero_pd();
1452             fjy2             = _mm_setzero_pd();
1453             fjz2             = _mm_setzero_pd();
1454             fjx3             = _mm_setzero_pd();
1455             fjy3             = _mm_setzero_pd();
1456             fjz3             = _mm_setzero_pd();
1457
1458             /**************************
1459              * CALCULATE INTERACTIONS *
1460              **************************/
1461
1462             if (gmx_mm_any_lt(rsq00,rcutoff2))
1463             {
1464
1465             /* LENNARD-JONES DISPERSION/REPULSION */
1466
1467             rinvsix          = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
1468             fvdw             = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(c12_00,rinvsix),c6_00),_mm_mul_pd(rinvsix,rinvsq00));
1469
1470             cutoff_mask      = _mm_cmplt_pd(rsq00,rcutoff2);
1471
1472             fscal            = fvdw;
1473
1474             fscal            = _mm_and_pd(fscal,cutoff_mask);
1475
1476             /* Calculate temporary vectorial force */
1477             tx               = _mm_mul_pd(fscal,dx00);
1478             ty               = _mm_mul_pd(fscal,dy00);
1479             tz               = _mm_mul_pd(fscal,dz00);
1480
1481             /* Update vectorial force */
1482             fix0             = _mm_add_pd(fix0,tx);
1483             fiy0             = _mm_add_pd(fiy0,ty);
1484             fiz0             = _mm_add_pd(fiz0,tz);
1485
1486             fjx0             = _mm_add_pd(fjx0,tx);
1487             fjy0             = _mm_add_pd(fjy0,ty);
1488             fjz0             = _mm_add_pd(fjz0,tz);
1489
1490             }
1491
1492             /**************************
1493              * CALCULATE INTERACTIONS *
1494              **************************/
1495
1496             if (gmx_mm_any_lt(rsq11,rcutoff2))
1497             {
1498
1499             /* REACTION-FIELD ELECTROSTATICS */
1500             felec            = _mm_mul_pd(qq11,_mm_sub_pd(_mm_mul_pd(rinv11,rinvsq11),krf2));
1501
1502             cutoff_mask      = _mm_cmplt_pd(rsq11,rcutoff2);
1503
1504             fscal            = felec;
1505
1506             fscal            = _mm_and_pd(fscal,cutoff_mask);
1507
1508             /* Calculate temporary vectorial force */
1509             tx               = _mm_mul_pd(fscal,dx11);
1510             ty               = _mm_mul_pd(fscal,dy11);
1511             tz               = _mm_mul_pd(fscal,dz11);
1512
1513             /* Update vectorial force */
1514             fix1             = _mm_add_pd(fix1,tx);
1515             fiy1             = _mm_add_pd(fiy1,ty);
1516             fiz1             = _mm_add_pd(fiz1,tz);
1517
1518             fjx1             = _mm_add_pd(fjx1,tx);
1519             fjy1             = _mm_add_pd(fjy1,ty);
1520             fjz1             = _mm_add_pd(fjz1,tz);
1521
1522             }
1523
1524             /**************************
1525              * CALCULATE INTERACTIONS *
1526              **************************/
1527
1528             if (gmx_mm_any_lt(rsq12,rcutoff2))
1529             {
1530
1531             /* REACTION-FIELD ELECTROSTATICS */
1532             felec            = _mm_mul_pd(qq12,_mm_sub_pd(_mm_mul_pd(rinv12,rinvsq12),krf2));
1533
1534             cutoff_mask      = _mm_cmplt_pd(rsq12,rcutoff2);
1535
1536             fscal            = felec;
1537
1538             fscal            = _mm_and_pd(fscal,cutoff_mask);
1539
1540             /* Calculate temporary vectorial force */
1541             tx               = _mm_mul_pd(fscal,dx12);
1542             ty               = _mm_mul_pd(fscal,dy12);
1543             tz               = _mm_mul_pd(fscal,dz12);
1544
1545             /* Update vectorial force */
1546             fix1             = _mm_add_pd(fix1,tx);
1547             fiy1             = _mm_add_pd(fiy1,ty);
1548             fiz1             = _mm_add_pd(fiz1,tz);
1549
1550             fjx2             = _mm_add_pd(fjx2,tx);
1551             fjy2             = _mm_add_pd(fjy2,ty);
1552             fjz2             = _mm_add_pd(fjz2,tz);
1553
1554             }
1555
1556             /**************************
1557              * CALCULATE INTERACTIONS *
1558              **************************/
1559
1560             if (gmx_mm_any_lt(rsq13,rcutoff2))
1561             {
1562
1563             /* REACTION-FIELD ELECTROSTATICS */
1564             felec            = _mm_mul_pd(qq13,_mm_sub_pd(_mm_mul_pd(rinv13,rinvsq13),krf2));
1565
1566             cutoff_mask      = _mm_cmplt_pd(rsq13,rcutoff2);
1567
1568             fscal            = felec;
1569
1570             fscal            = _mm_and_pd(fscal,cutoff_mask);
1571
1572             /* Calculate temporary vectorial force */
1573             tx               = _mm_mul_pd(fscal,dx13);
1574             ty               = _mm_mul_pd(fscal,dy13);
1575             tz               = _mm_mul_pd(fscal,dz13);
1576
1577             /* Update vectorial force */
1578             fix1             = _mm_add_pd(fix1,tx);
1579             fiy1             = _mm_add_pd(fiy1,ty);
1580             fiz1             = _mm_add_pd(fiz1,tz);
1581
1582             fjx3             = _mm_add_pd(fjx3,tx);
1583             fjy3             = _mm_add_pd(fjy3,ty);
1584             fjz3             = _mm_add_pd(fjz3,tz);
1585
1586             }
1587
1588             /**************************
1589              * CALCULATE INTERACTIONS *
1590              **************************/
1591
1592             if (gmx_mm_any_lt(rsq21,rcutoff2))
1593             {
1594
1595             /* REACTION-FIELD ELECTROSTATICS */
1596             felec            = _mm_mul_pd(qq21,_mm_sub_pd(_mm_mul_pd(rinv21,rinvsq21),krf2));
1597
1598             cutoff_mask      = _mm_cmplt_pd(rsq21,rcutoff2);
1599
1600             fscal            = felec;
1601
1602             fscal            = _mm_and_pd(fscal,cutoff_mask);
1603
1604             /* Calculate temporary vectorial force */
1605             tx               = _mm_mul_pd(fscal,dx21);
1606             ty               = _mm_mul_pd(fscal,dy21);
1607             tz               = _mm_mul_pd(fscal,dz21);
1608
1609             /* Update vectorial force */
1610             fix2             = _mm_add_pd(fix2,tx);
1611             fiy2             = _mm_add_pd(fiy2,ty);
1612             fiz2             = _mm_add_pd(fiz2,tz);
1613
1614             fjx1             = _mm_add_pd(fjx1,tx);
1615             fjy1             = _mm_add_pd(fjy1,ty);
1616             fjz1             = _mm_add_pd(fjz1,tz);
1617
1618             }
1619
1620             /**************************
1621              * CALCULATE INTERACTIONS *
1622              **************************/
1623
1624             if (gmx_mm_any_lt(rsq22,rcutoff2))
1625             {
1626
1627             /* REACTION-FIELD ELECTROSTATICS */
1628             felec            = _mm_mul_pd(qq22,_mm_sub_pd(_mm_mul_pd(rinv22,rinvsq22),krf2));
1629
1630             cutoff_mask      = _mm_cmplt_pd(rsq22,rcutoff2);
1631
1632             fscal            = felec;
1633
1634             fscal            = _mm_and_pd(fscal,cutoff_mask);
1635
1636             /* Calculate temporary vectorial force */
1637             tx               = _mm_mul_pd(fscal,dx22);
1638             ty               = _mm_mul_pd(fscal,dy22);
1639             tz               = _mm_mul_pd(fscal,dz22);
1640
1641             /* Update vectorial force */
1642             fix2             = _mm_add_pd(fix2,tx);
1643             fiy2             = _mm_add_pd(fiy2,ty);
1644             fiz2             = _mm_add_pd(fiz2,tz);
1645
1646             fjx2             = _mm_add_pd(fjx2,tx);
1647             fjy2             = _mm_add_pd(fjy2,ty);
1648             fjz2             = _mm_add_pd(fjz2,tz);
1649
1650             }
1651
1652             /**************************
1653              * CALCULATE INTERACTIONS *
1654              **************************/
1655
1656             if (gmx_mm_any_lt(rsq23,rcutoff2))
1657             {
1658
1659             /* REACTION-FIELD ELECTROSTATICS */
1660             felec            = _mm_mul_pd(qq23,_mm_sub_pd(_mm_mul_pd(rinv23,rinvsq23),krf2));
1661
1662             cutoff_mask      = _mm_cmplt_pd(rsq23,rcutoff2);
1663
1664             fscal            = felec;
1665
1666             fscal            = _mm_and_pd(fscal,cutoff_mask);
1667
1668             /* Calculate temporary vectorial force */
1669             tx               = _mm_mul_pd(fscal,dx23);
1670             ty               = _mm_mul_pd(fscal,dy23);
1671             tz               = _mm_mul_pd(fscal,dz23);
1672
1673             /* Update vectorial force */
1674             fix2             = _mm_add_pd(fix2,tx);
1675             fiy2             = _mm_add_pd(fiy2,ty);
1676             fiz2             = _mm_add_pd(fiz2,tz);
1677
1678             fjx3             = _mm_add_pd(fjx3,tx);
1679             fjy3             = _mm_add_pd(fjy3,ty);
1680             fjz3             = _mm_add_pd(fjz3,tz);
1681
1682             }
1683
1684             /**************************
1685              * CALCULATE INTERACTIONS *
1686              **************************/
1687
1688             if (gmx_mm_any_lt(rsq31,rcutoff2))
1689             {
1690
1691             /* REACTION-FIELD ELECTROSTATICS */
1692             felec            = _mm_mul_pd(qq31,_mm_sub_pd(_mm_mul_pd(rinv31,rinvsq31),krf2));
1693
1694             cutoff_mask      = _mm_cmplt_pd(rsq31,rcutoff2);
1695
1696             fscal            = felec;
1697
1698             fscal            = _mm_and_pd(fscal,cutoff_mask);
1699
1700             /* Calculate temporary vectorial force */
1701             tx               = _mm_mul_pd(fscal,dx31);
1702             ty               = _mm_mul_pd(fscal,dy31);
1703             tz               = _mm_mul_pd(fscal,dz31);
1704
1705             /* Update vectorial force */
1706             fix3             = _mm_add_pd(fix3,tx);
1707             fiy3             = _mm_add_pd(fiy3,ty);
1708             fiz3             = _mm_add_pd(fiz3,tz);
1709
1710             fjx1             = _mm_add_pd(fjx1,tx);
1711             fjy1             = _mm_add_pd(fjy1,ty);
1712             fjz1             = _mm_add_pd(fjz1,tz);
1713
1714             }
1715
1716             /**************************
1717              * CALCULATE INTERACTIONS *
1718              **************************/
1719
1720             if (gmx_mm_any_lt(rsq32,rcutoff2))
1721             {
1722
1723             /* REACTION-FIELD ELECTROSTATICS */
1724             felec            = _mm_mul_pd(qq32,_mm_sub_pd(_mm_mul_pd(rinv32,rinvsq32),krf2));
1725
1726             cutoff_mask      = _mm_cmplt_pd(rsq32,rcutoff2);
1727
1728             fscal            = felec;
1729
1730             fscal            = _mm_and_pd(fscal,cutoff_mask);
1731
1732             /* Calculate temporary vectorial force */
1733             tx               = _mm_mul_pd(fscal,dx32);
1734             ty               = _mm_mul_pd(fscal,dy32);
1735             tz               = _mm_mul_pd(fscal,dz32);
1736
1737             /* Update vectorial force */
1738             fix3             = _mm_add_pd(fix3,tx);
1739             fiy3             = _mm_add_pd(fiy3,ty);
1740             fiz3             = _mm_add_pd(fiz3,tz);
1741
1742             fjx2             = _mm_add_pd(fjx2,tx);
1743             fjy2             = _mm_add_pd(fjy2,ty);
1744             fjz2             = _mm_add_pd(fjz2,tz);
1745
1746             }
1747
1748             /**************************
1749              * CALCULATE INTERACTIONS *
1750              **************************/
1751
1752             if (gmx_mm_any_lt(rsq33,rcutoff2))
1753             {
1754
1755             /* REACTION-FIELD ELECTROSTATICS */
1756             felec            = _mm_mul_pd(qq33,_mm_sub_pd(_mm_mul_pd(rinv33,rinvsq33),krf2));
1757
1758             cutoff_mask      = _mm_cmplt_pd(rsq33,rcutoff2);
1759
1760             fscal            = felec;
1761
1762             fscal            = _mm_and_pd(fscal,cutoff_mask);
1763
1764             /* Calculate temporary vectorial force */
1765             tx               = _mm_mul_pd(fscal,dx33);
1766             ty               = _mm_mul_pd(fscal,dy33);
1767             tz               = _mm_mul_pd(fscal,dz33);
1768
1769             /* Update vectorial force */
1770             fix3             = _mm_add_pd(fix3,tx);
1771             fiy3             = _mm_add_pd(fiy3,ty);
1772             fiz3             = _mm_add_pd(fiz3,tz);
1773
1774             fjx3             = _mm_add_pd(fjx3,tx);
1775             fjy3             = _mm_add_pd(fjy3,ty);
1776             fjz3             = _mm_add_pd(fjz3,tz);
1777
1778             }
1779
1780             gmx_mm_decrement_4rvec_2ptr_swizzle_pd(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1781
1782             /* Inner loop uses 303 flops */
1783         }
1784
1785         if(jidx<j_index_end)
1786         {
1787
1788             jnrA             = jjnr[jidx];
1789             j_coord_offsetA  = DIM*jnrA;
1790
1791             /* load j atom coordinates */
1792             gmx_mm_load_4rvec_1ptr_swizzle_pd(x+j_coord_offsetA,
1793                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
1794                                               &jy2,&jz2,&jx3,&jy3,&jz3);
1795
1796             /* Calculate displacement vector */
1797             dx00             = _mm_sub_pd(ix0,jx0);
1798             dy00             = _mm_sub_pd(iy0,jy0);
1799             dz00             = _mm_sub_pd(iz0,jz0);
1800             dx11             = _mm_sub_pd(ix1,jx1);
1801             dy11             = _mm_sub_pd(iy1,jy1);
1802             dz11             = _mm_sub_pd(iz1,jz1);
1803             dx12             = _mm_sub_pd(ix1,jx2);
1804             dy12             = _mm_sub_pd(iy1,jy2);
1805             dz12             = _mm_sub_pd(iz1,jz2);
1806             dx13             = _mm_sub_pd(ix1,jx3);
1807             dy13             = _mm_sub_pd(iy1,jy3);
1808             dz13             = _mm_sub_pd(iz1,jz3);
1809             dx21             = _mm_sub_pd(ix2,jx1);
1810             dy21             = _mm_sub_pd(iy2,jy1);
1811             dz21             = _mm_sub_pd(iz2,jz1);
1812             dx22             = _mm_sub_pd(ix2,jx2);
1813             dy22             = _mm_sub_pd(iy2,jy2);
1814             dz22             = _mm_sub_pd(iz2,jz2);
1815             dx23             = _mm_sub_pd(ix2,jx3);
1816             dy23             = _mm_sub_pd(iy2,jy3);
1817             dz23             = _mm_sub_pd(iz2,jz3);
1818             dx31             = _mm_sub_pd(ix3,jx1);
1819             dy31             = _mm_sub_pd(iy3,jy1);
1820             dz31             = _mm_sub_pd(iz3,jz1);
1821             dx32             = _mm_sub_pd(ix3,jx2);
1822             dy32             = _mm_sub_pd(iy3,jy2);
1823             dz32             = _mm_sub_pd(iz3,jz2);
1824             dx33             = _mm_sub_pd(ix3,jx3);
1825             dy33             = _mm_sub_pd(iy3,jy3);
1826             dz33             = _mm_sub_pd(iz3,jz3);
1827
1828             /* Calculate squared distance and things based on it */
1829             rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
1830             rsq11            = gmx_mm_calc_rsq_pd(dx11,dy11,dz11);
1831             rsq12            = gmx_mm_calc_rsq_pd(dx12,dy12,dz12);
1832             rsq13            = gmx_mm_calc_rsq_pd(dx13,dy13,dz13);
1833             rsq21            = gmx_mm_calc_rsq_pd(dx21,dy21,dz21);
1834             rsq22            = gmx_mm_calc_rsq_pd(dx22,dy22,dz22);
1835             rsq23            = gmx_mm_calc_rsq_pd(dx23,dy23,dz23);
1836             rsq31            = gmx_mm_calc_rsq_pd(dx31,dy31,dz31);
1837             rsq32            = gmx_mm_calc_rsq_pd(dx32,dy32,dz32);
1838             rsq33            = gmx_mm_calc_rsq_pd(dx33,dy33,dz33);
1839
1840             rinv11           = gmx_mm_invsqrt_pd(rsq11);
1841             rinv12           = gmx_mm_invsqrt_pd(rsq12);
1842             rinv13           = gmx_mm_invsqrt_pd(rsq13);
1843             rinv21           = gmx_mm_invsqrt_pd(rsq21);
1844             rinv22           = gmx_mm_invsqrt_pd(rsq22);
1845             rinv23           = gmx_mm_invsqrt_pd(rsq23);
1846             rinv31           = gmx_mm_invsqrt_pd(rsq31);
1847             rinv32           = gmx_mm_invsqrt_pd(rsq32);
1848             rinv33           = gmx_mm_invsqrt_pd(rsq33);
1849
1850             rinvsq00         = gmx_mm_inv_pd(rsq00);
1851             rinvsq11         = _mm_mul_pd(rinv11,rinv11);
1852             rinvsq12         = _mm_mul_pd(rinv12,rinv12);
1853             rinvsq13         = _mm_mul_pd(rinv13,rinv13);
1854             rinvsq21         = _mm_mul_pd(rinv21,rinv21);
1855             rinvsq22         = _mm_mul_pd(rinv22,rinv22);
1856             rinvsq23         = _mm_mul_pd(rinv23,rinv23);
1857             rinvsq31         = _mm_mul_pd(rinv31,rinv31);
1858             rinvsq32         = _mm_mul_pd(rinv32,rinv32);
1859             rinvsq33         = _mm_mul_pd(rinv33,rinv33);
1860
1861             fjx0             = _mm_setzero_pd();
1862             fjy0             = _mm_setzero_pd();
1863             fjz0             = _mm_setzero_pd();
1864             fjx1             = _mm_setzero_pd();
1865             fjy1             = _mm_setzero_pd();
1866             fjz1             = _mm_setzero_pd();
1867             fjx2             = _mm_setzero_pd();
1868             fjy2             = _mm_setzero_pd();
1869             fjz2             = _mm_setzero_pd();
1870             fjx3             = _mm_setzero_pd();
1871             fjy3             = _mm_setzero_pd();
1872             fjz3             = _mm_setzero_pd();
1873
1874             /**************************
1875              * CALCULATE INTERACTIONS *
1876              **************************/
1877
1878             if (gmx_mm_any_lt(rsq00,rcutoff2))
1879             {
1880
1881             /* LENNARD-JONES DISPERSION/REPULSION */
1882
1883             rinvsix          = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
1884             fvdw             = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(c12_00,rinvsix),c6_00),_mm_mul_pd(rinvsix,rinvsq00));
1885
1886             cutoff_mask      = _mm_cmplt_pd(rsq00,rcutoff2);
1887
1888             fscal            = fvdw;
1889
1890             fscal            = _mm_and_pd(fscal,cutoff_mask);
1891
1892             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1893
1894             /* Calculate temporary vectorial force */
1895             tx               = _mm_mul_pd(fscal,dx00);
1896             ty               = _mm_mul_pd(fscal,dy00);
1897             tz               = _mm_mul_pd(fscal,dz00);
1898
1899             /* Update vectorial force */
1900             fix0             = _mm_add_pd(fix0,tx);
1901             fiy0             = _mm_add_pd(fiy0,ty);
1902             fiz0             = _mm_add_pd(fiz0,tz);
1903
1904             fjx0             = _mm_add_pd(fjx0,tx);
1905             fjy0             = _mm_add_pd(fjy0,ty);
1906             fjz0             = _mm_add_pd(fjz0,tz);
1907
1908             }
1909
1910             /**************************
1911              * CALCULATE INTERACTIONS *
1912              **************************/
1913
1914             if (gmx_mm_any_lt(rsq11,rcutoff2))
1915             {
1916
1917             /* REACTION-FIELD ELECTROSTATICS */
1918             felec            = _mm_mul_pd(qq11,_mm_sub_pd(_mm_mul_pd(rinv11,rinvsq11),krf2));
1919
1920             cutoff_mask      = _mm_cmplt_pd(rsq11,rcutoff2);
1921
1922             fscal            = felec;
1923
1924             fscal            = _mm_and_pd(fscal,cutoff_mask);
1925
1926             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1927
1928             /* Calculate temporary vectorial force */
1929             tx               = _mm_mul_pd(fscal,dx11);
1930             ty               = _mm_mul_pd(fscal,dy11);
1931             tz               = _mm_mul_pd(fscal,dz11);
1932
1933             /* Update vectorial force */
1934             fix1             = _mm_add_pd(fix1,tx);
1935             fiy1             = _mm_add_pd(fiy1,ty);
1936             fiz1             = _mm_add_pd(fiz1,tz);
1937
1938             fjx1             = _mm_add_pd(fjx1,tx);
1939             fjy1             = _mm_add_pd(fjy1,ty);
1940             fjz1             = _mm_add_pd(fjz1,tz);
1941
1942             }
1943
1944             /**************************
1945              * CALCULATE INTERACTIONS *
1946              **************************/
1947
1948             if (gmx_mm_any_lt(rsq12,rcutoff2))
1949             {
1950
1951             /* REACTION-FIELD ELECTROSTATICS */
1952             felec            = _mm_mul_pd(qq12,_mm_sub_pd(_mm_mul_pd(rinv12,rinvsq12),krf2));
1953
1954             cutoff_mask      = _mm_cmplt_pd(rsq12,rcutoff2);
1955
1956             fscal            = felec;
1957
1958             fscal            = _mm_and_pd(fscal,cutoff_mask);
1959
1960             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1961
1962             /* Calculate temporary vectorial force */
1963             tx               = _mm_mul_pd(fscal,dx12);
1964             ty               = _mm_mul_pd(fscal,dy12);
1965             tz               = _mm_mul_pd(fscal,dz12);
1966
1967             /* Update vectorial force */
1968             fix1             = _mm_add_pd(fix1,tx);
1969             fiy1             = _mm_add_pd(fiy1,ty);
1970             fiz1             = _mm_add_pd(fiz1,tz);
1971
1972             fjx2             = _mm_add_pd(fjx2,tx);
1973             fjy2             = _mm_add_pd(fjy2,ty);
1974             fjz2             = _mm_add_pd(fjz2,tz);
1975
1976             }
1977
1978             /**************************
1979              * CALCULATE INTERACTIONS *
1980              **************************/
1981
1982             if (gmx_mm_any_lt(rsq13,rcutoff2))
1983             {
1984
1985             /* REACTION-FIELD ELECTROSTATICS */
1986             felec            = _mm_mul_pd(qq13,_mm_sub_pd(_mm_mul_pd(rinv13,rinvsq13),krf2));
1987
1988             cutoff_mask      = _mm_cmplt_pd(rsq13,rcutoff2);
1989
1990             fscal            = felec;
1991
1992             fscal            = _mm_and_pd(fscal,cutoff_mask);
1993
1994             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1995
1996             /* Calculate temporary vectorial force */
1997             tx               = _mm_mul_pd(fscal,dx13);
1998             ty               = _mm_mul_pd(fscal,dy13);
1999             tz               = _mm_mul_pd(fscal,dz13);
2000
2001             /* Update vectorial force */
2002             fix1             = _mm_add_pd(fix1,tx);
2003             fiy1             = _mm_add_pd(fiy1,ty);
2004             fiz1             = _mm_add_pd(fiz1,tz);
2005
2006             fjx3             = _mm_add_pd(fjx3,tx);
2007             fjy3             = _mm_add_pd(fjy3,ty);
2008             fjz3             = _mm_add_pd(fjz3,tz);
2009
2010             }
2011
2012             /**************************
2013              * CALCULATE INTERACTIONS *
2014              **************************/
2015
2016             if (gmx_mm_any_lt(rsq21,rcutoff2))
2017             {
2018
2019             /* REACTION-FIELD ELECTROSTATICS */
2020             felec            = _mm_mul_pd(qq21,_mm_sub_pd(_mm_mul_pd(rinv21,rinvsq21),krf2));
2021
2022             cutoff_mask      = _mm_cmplt_pd(rsq21,rcutoff2);
2023
2024             fscal            = felec;
2025
2026             fscal            = _mm_and_pd(fscal,cutoff_mask);
2027
2028             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
2029
2030             /* Calculate temporary vectorial force */
2031             tx               = _mm_mul_pd(fscal,dx21);
2032             ty               = _mm_mul_pd(fscal,dy21);
2033             tz               = _mm_mul_pd(fscal,dz21);
2034
2035             /* Update vectorial force */
2036             fix2             = _mm_add_pd(fix2,tx);
2037             fiy2             = _mm_add_pd(fiy2,ty);
2038             fiz2             = _mm_add_pd(fiz2,tz);
2039
2040             fjx1             = _mm_add_pd(fjx1,tx);
2041             fjy1             = _mm_add_pd(fjy1,ty);
2042             fjz1             = _mm_add_pd(fjz1,tz);
2043
2044             }
2045
2046             /**************************
2047              * CALCULATE INTERACTIONS *
2048              **************************/
2049
2050             if (gmx_mm_any_lt(rsq22,rcutoff2))
2051             {
2052
2053             /* REACTION-FIELD ELECTROSTATICS */
2054             felec            = _mm_mul_pd(qq22,_mm_sub_pd(_mm_mul_pd(rinv22,rinvsq22),krf2));
2055
2056             cutoff_mask      = _mm_cmplt_pd(rsq22,rcutoff2);
2057
2058             fscal            = felec;
2059
2060             fscal            = _mm_and_pd(fscal,cutoff_mask);
2061
2062             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
2063
2064             /* Calculate temporary vectorial force */
2065             tx               = _mm_mul_pd(fscal,dx22);
2066             ty               = _mm_mul_pd(fscal,dy22);
2067             tz               = _mm_mul_pd(fscal,dz22);
2068
2069             /* Update vectorial force */
2070             fix2             = _mm_add_pd(fix2,tx);
2071             fiy2             = _mm_add_pd(fiy2,ty);
2072             fiz2             = _mm_add_pd(fiz2,tz);
2073
2074             fjx2             = _mm_add_pd(fjx2,tx);
2075             fjy2             = _mm_add_pd(fjy2,ty);
2076             fjz2             = _mm_add_pd(fjz2,tz);
2077
2078             }
2079
2080             /**************************
2081              * CALCULATE INTERACTIONS *
2082              **************************/
2083
2084             if (gmx_mm_any_lt(rsq23,rcutoff2))
2085             {
2086
2087             /* REACTION-FIELD ELECTROSTATICS */
2088             felec            = _mm_mul_pd(qq23,_mm_sub_pd(_mm_mul_pd(rinv23,rinvsq23),krf2));
2089
2090             cutoff_mask      = _mm_cmplt_pd(rsq23,rcutoff2);
2091
2092             fscal            = felec;
2093
2094             fscal            = _mm_and_pd(fscal,cutoff_mask);
2095
2096             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
2097
2098             /* Calculate temporary vectorial force */
2099             tx               = _mm_mul_pd(fscal,dx23);
2100             ty               = _mm_mul_pd(fscal,dy23);
2101             tz               = _mm_mul_pd(fscal,dz23);
2102
2103             /* Update vectorial force */
2104             fix2             = _mm_add_pd(fix2,tx);
2105             fiy2             = _mm_add_pd(fiy2,ty);
2106             fiz2             = _mm_add_pd(fiz2,tz);
2107
2108             fjx3             = _mm_add_pd(fjx3,tx);
2109             fjy3             = _mm_add_pd(fjy3,ty);
2110             fjz3             = _mm_add_pd(fjz3,tz);
2111
2112             }
2113
2114             /**************************
2115              * CALCULATE INTERACTIONS *
2116              **************************/
2117
2118             if (gmx_mm_any_lt(rsq31,rcutoff2))
2119             {
2120
2121             /* REACTION-FIELD ELECTROSTATICS */
2122             felec            = _mm_mul_pd(qq31,_mm_sub_pd(_mm_mul_pd(rinv31,rinvsq31),krf2));
2123
2124             cutoff_mask      = _mm_cmplt_pd(rsq31,rcutoff2);
2125
2126             fscal            = felec;
2127
2128             fscal            = _mm_and_pd(fscal,cutoff_mask);
2129
2130             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
2131
2132             /* Calculate temporary vectorial force */
2133             tx               = _mm_mul_pd(fscal,dx31);
2134             ty               = _mm_mul_pd(fscal,dy31);
2135             tz               = _mm_mul_pd(fscal,dz31);
2136
2137             /* Update vectorial force */
2138             fix3             = _mm_add_pd(fix3,tx);
2139             fiy3             = _mm_add_pd(fiy3,ty);
2140             fiz3             = _mm_add_pd(fiz3,tz);
2141
2142             fjx1             = _mm_add_pd(fjx1,tx);
2143             fjy1             = _mm_add_pd(fjy1,ty);
2144             fjz1             = _mm_add_pd(fjz1,tz);
2145
2146             }
2147
2148             /**************************
2149              * CALCULATE INTERACTIONS *
2150              **************************/
2151
2152             if (gmx_mm_any_lt(rsq32,rcutoff2))
2153             {
2154
2155             /* REACTION-FIELD ELECTROSTATICS */
2156             felec            = _mm_mul_pd(qq32,_mm_sub_pd(_mm_mul_pd(rinv32,rinvsq32),krf2));
2157
2158             cutoff_mask      = _mm_cmplt_pd(rsq32,rcutoff2);
2159
2160             fscal            = felec;
2161
2162             fscal            = _mm_and_pd(fscal,cutoff_mask);
2163
2164             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
2165
2166             /* Calculate temporary vectorial force */
2167             tx               = _mm_mul_pd(fscal,dx32);
2168             ty               = _mm_mul_pd(fscal,dy32);
2169             tz               = _mm_mul_pd(fscal,dz32);
2170
2171             /* Update vectorial force */
2172             fix3             = _mm_add_pd(fix3,tx);
2173             fiy3             = _mm_add_pd(fiy3,ty);
2174             fiz3             = _mm_add_pd(fiz3,tz);
2175
2176             fjx2             = _mm_add_pd(fjx2,tx);
2177             fjy2             = _mm_add_pd(fjy2,ty);
2178             fjz2             = _mm_add_pd(fjz2,tz);
2179
2180             }
2181
2182             /**************************
2183              * CALCULATE INTERACTIONS *
2184              **************************/
2185
2186             if (gmx_mm_any_lt(rsq33,rcutoff2))
2187             {
2188
2189             /* REACTION-FIELD ELECTROSTATICS */
2190             felec            = _mm_mul_pd(qq33,_mm_sub_pd(_mm_mul_pd(rinv33,rinvsq33),krf2));
2191
2192             cutoff_mask      = _mm_cmplt_pd(rsq33,rcutoff2);
2193
2194             fscal            = felec;
2195
2196             fscal            = _mm_and_pd(fscal,cutoff_mask);
2197
2198             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
2199
2200             /* Calculate temporary vectorial force */
2201             tx               = _mm_mul_pd(fscal,dx33);
2202             ty               = _mm_mul_pd(fscal,dy33);
2203             tz               = _mm_mul_pd(fscal,dz33);
2204
2205             /* Update vectorial force */
2206             fix3             = _mm_add_pd(fix3,tx);
2207             fiy3             = _mm_add_pd(fiy3,ty);
2208             fiz3             = _mm_add_pd(fiz3,tz);
2209
2210             fjx3             = _mm_add_pd(fjx3,tx);
2211             fjy3             = _mm_add_pd(fjy3,ty);
2212             fjz3             = _mm_add_pd(fjz3,tz);
2213
2214             }
2215
2216             gmx_mm_decrement_4rvec_1ptr_swizzle_pd(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
2217
2218             /* Inner loop uses 303 flops */
2219         }
2220
2221         /* End of innermost loop */
2222
2223         gmx_mm_update_iforce_4atom_swizzle_pd(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
2224                                               f+i_coord_offset,fshift+i_shift_offset);
2225
2226         /* Increment number of inner iterations */
2227         inneriter                  += j_index_end - j_index_start;
2228
2229         /* Outer loop uses 24 flops */
2230     }
2231
2232     /* Increment number of outer iterations */
2233     outeriter        += nri;
2234
2235     /* Update outer/inner flops */
2236
2237     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*303);
2238 }