Remove all unnecessary HAVE_CONFIG_H
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_sse2_double / nb_kernel_ElecCoul_VdwLJ_GeomW3W3_sse2_double.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the GROMACS sse2_double kernel generator.
37  */
38 #include "config.h"
39
40 #include <math.h>
41
42 #include "../nb_kernel.h"
43 #include "types/simple.h"
44 #include "gromacs/math/vec.h"
45 #include "nrnb.h"
46
47 #include "gromacs/simd/math_x86_sse2_double.h"
48 #include "kernelutil_x86_sse2_double.h"
49
50 /*
51  * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse2_double
52  * Electrostatics interaction: Coulomb
53  * VdW interaction:            LennardJones
54  * Geometry:                   Water3-Water3
55  * Calculate force/pot:        PotentialAndForce
56  */
57 void
58 nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse2_double
59                     (t_nblist                    * gmx_restrict       nlist,
60                      rvec                        * gmx_restrict          xx,
61                      rvec                        * gmx_restrict          ff,
62                      t_forcerec                  * gmx_restrict          fr,
63                      t_mdatoms                   * gmx_restrict     mdatoms,
64                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
65                      t_nrnb                      * gmx_restrict        nrnb)
66 {
67     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
68      * just 0 for non-waters.
69      * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
70      * jnr indices corresponding to data put in the four positions in the SIMD register.
71      */
72     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
73     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
74     int              jnrA,jnrB;
75     int              j_coord_offsetA,j_coord_offsetB;
76     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
77     real             rcutoff_scalar;
78     real             *shiftvec,*fshift,*x,*f;
79     __m128d          tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
80     int              vdwioffset0;
81     __m128d          ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
82     int              vdwioffset1;
83     __m128d          ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
84     int              vdwioffset2;
85     __m128d          ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
86     int              vdwjidx0A,vdwjidx0B;
87     __m128d          jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
88     int              vdwjidx1A,vdwjidx1B;
89     __m128d          jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
90     int              vdwjidx2A,vdwjidx2B;
91     __m128d          jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
92     __m128d          dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
93     __m128d          dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
94     __m128d          dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
95     __m128d          dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
96     __m128d          dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
97     __m128d          dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
98     __m128d          dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
99     __m128d          dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
100     __m128d          dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
101     __m128d          velec,felec,velecsum,facel,crf,krf,krf2;
102     real             *charge;
103     int              nvdwtype;
104     __m128d          rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
105     int              *vdwtype;
106     real             *vdwparam;
107     __m128d          one_sixth   = _mm_set1_pd(1.0/6.0);
108     __m128d          one_twelfth = _mm_set1_pd(1.0/12.0);
109     __m128d          dummy_mask,cutoff_mask;
110     __m128d          signbit   = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
111     __m128d          one     = _mm_set1_pd(1.0);
112     __m128d          two     = _mm_set1_pd(2.0);
113     x                = xx[0];
114     f                = ff[0];
115
116     nri              = nlist->nri;
117     iinr             = nlist->iinr;
118     jindex           = nlist->jindex;
119     jjnr             = nlist->jjnr;
120     shiftidx         = nlist->shift;
121     gid              = nlist->gid;
122     shiftvec         = fr->shift_vec[0];
123     fshift           = fr->fshift[0];
124     facel            = _mm_set1_pd(fr->epsfac);
125     charge           = mdatoms->chargeA;
126     nvdwtype         = fr->ntype;
127     vdwparam         = fr->nbfp;
128     vdwtype          = mdatoms->typeA;
129
130     /* Setup water-specific parameters */
131     inr              = nlist->iinr[0];
132     iq0              = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+0]));
133     iq1              = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+1]));
134     iq2              = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+2]));
135     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
136
137     jq0              = _mm_set1_pd(charge[inr+0]);
138     jq1              = _mm_set1_pd(charge[inr+1]);
139     jq2              = _mm_set1_pd(charge[inr+2]);
140     vdwjidx0A        = 2*vdwtype[inr+0];
141     qq00             = _mm_mul_pd(iq0,jq0);
142     c6_00            = _mm_set1_pd(vdwparam[vdwioffset0+vdwjidx0A]);
143     c12_00           = _mm_set1_pd(vdwparam[vdwioffset0+vdwjidx0A+1]);
144     qq01             = _mm_mul_pd(iq0,jq1);
145     qq02             = _mm_mul_pd(iq0,jq2);
146     qq10             = _mm_mul_pd(iq1,jq0);
147     qq11             = _mm_mul_pd(iq1,jq1);
148     qq12             = _mm_mul_pd(iq1,jq2);
149     qq20             = _mm_mul_pd(iq2,jq0);
150     qq21             = _mm_mul_pd(iq2,jq1);
151     qq22             = _mm_mul_pd(iq2,jq2);
152
153     /* Avoid stupid compiler warnings */
154     jnrA = jnrB = 0;
155     j_coord_offsetA = 0;
156     j_coord_offsetB = 0;
157
158     outeriter        = 0;
159     inneriter        = 0;
160
161     /* Start outer loop over neighborlists */
162     for(iidx=0; iidx<nri; iidx++)
163     {
164         /* Load shift vector for this list */
165         i_shift_offset   = DIM*shiftidx[iidx];
166
167         /* Load limits for loop over neighbors */
168         j_index_start    = jindex[iidx];
169         j_index_end      = jindex[iidx+1];
170
171         /* Get outer coordinate index */
172         inr              = iinr[iidx];
173         i_coord_offset   = DIM*inr;
174
175         /* Load i particle coords and add shift vector */
176         gmx_mm_load_shift_and_3rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,
177                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
178
179         fix0             = _mm_setzero_pd();
180         fiy0             = _mm_setzero_pd();
181         fiz0             = _mm_setzero_pd();
182         fix1             = _mm_setzero_pd();
183         fiy1             = _mm_setzero_pd();
184         fiz1             = _mm_setzero_pd();
185         fix2             = _mm_setzero_pd();
186         fiy2             = _mm_setzero_pd();
187         fiz2             = _mm_setzero_pd();
188
189         /* Reset potential sums */
190         velecsum         = _mm_setzero_pd();
191         vvdwsum          = _mm_setzero_pd();
192
193         /* Start inner kernel loop */
194         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
195         {
196
197             /* Get j neighbor index, and coordinate index */
198             jnrA             = jjnr[jidx];
199             jnrB             = jjnr[jidx+1];
200             j_coord_offsetA  = DIM*jnrA;
201             j_coord_offsetB  = DIM*jnrB;
202
203             /* load j atom coordinates */
204             gmx_mm_load_3rvec_2ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
205                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
206
207             /* Calculate displacement vector */
208             dx00             = _mm_sub_pd(ix0,jx0);
209             dy00             = _mm_sub_pd(iy0,jy0);
210             dz00             = _mm_sub_pd(iz0,jz0);
211             dx01             = _mm_sub_pd(ix0,jx1);
212             dy01             = _mm_sub_pd(iy0,jy1);
213             dz01             = _mm_sub_pd(iz0,jz1);
214             dx02             = _mm_sub_pd(ix0,jx2);
215             dy02             = _mm_sub_pd(iy0,jy2);
216             dz02             = _mm_sub_pd(iz0,jz2);
217             dx10             = _mm_sub_pd(ix1,jx0);
218             dy10             = _mm_sub_pd(iy1,jy0);
219             dz10             = _mm_sub_pd(iz1,jz0);
220             dx11             = _mm_sub_pd(ix1,jx1);
221             dy11             = _mm_sub_pd(iy1,jy1);
222             dz11             = _mm_sub_pd(iz1,jz1);
223             dx12             = _mm_sub_pd(ix1,jx2);
224             dy12             = _mm_sub_pd(iy1,jy2);
225             dz12             = _mm_sub_pd(iz1,jz2);
226             dx20             = _mm_sub_pd(ix2,jx0);
227             dy20             = _mm_sub_pd(iy2,jy0);
228             dz20             = _mm_sub_pd(iz2,jz0);
229             dx21             = _mm_sub_pd(ix2,jx1);
230             dy21             = _mm_sub_pd(iy2,jy1);
231             dz21             = _mm_sub_pd(iz2,jz1);
232             dx22             = _mm_sub_pd(ix2,jx2);
233             dy22             = _mm_sub_pd(iy2,jy2);
234             dz22             = _mm_sub_pd(iz2,jz2);
235
236             /* Calculate squared distance and things based on it */
237             rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
238             rsq01            = gmx_mm_calc_rsq_pd(dx01,dy01,dz01);
239             rsq02            = gmx_mm_calc_rsq_pd(dx02,dy02,dz02);
240             rsq10            = gmx_mm_calc_rsq_pd(dx10,dy10,dz10);
241             rsq11            = gmx_mm_calc_rsq_pd(dx11,dy11,dz11);
242             rsq12            = gmx_mm_calc_rsq_pd(dx12,dy12,dz12);
243             rsq20            = gmx_mm_calc_rsq_pd(dx20,dy20,dz20);
244             rsq21            = gmx_mm_calc_rsq_pd(dx21,dy21,dz21);
245             rsq22            = gmx_mm_calc_rsq_pd(dx22,dy22,dz22);
246
247             rinv00           = gmx_mm_invsqrt_pd(rsq00);
248             rinv01           = gmx_mm_invsqrt_pd(rsq01);
249             rinv02           = gmx_mm_invsqrt_pd(rsq02);
250             rinv10           = gmx_mm_invsqrt_pd(rsq10);
251             rinv11           = gmx_mm_invsqrt_pd(rsq11);
252             rinv12           = gmx_mm_invsqrt_pd(rsq12);
253             rinv20           = gmx_mm_invsqrt_pd(rsq20);
254             rinv21           = gmx_mm_invsqrt_pd(rsq21);
255             rinv22           = gmx_mm_invsqrt_pd(rsq22);
256
257             rinvsq00         = _mm_mul_pd(rinv00,rinv00);
258             rinvsq01         = _mm_mul_pd(rinv01,rinv01);
259             rinvsq02         = _mm_mul_pd(rinv02,rinv02);
260             rinvsq10         = _mm_mul_pd(rinv10,rinv10);
261             rinvsq11         = _mm_mul_pd(rinv11,rinv11);
262             rinvsq12         = _mm_mul_pd(rinv12,rinv12);
263             rinvsq20         = _mm_mul_pd(rinv20,rinv20);
264             rinvsq21         = _mm_mul_pd(rinv21,rinv21);
265             rinvsq22         = _mm_mul_pd(rinv22,rinv22);
266
267             fjx0             = _mm_setzero_pd();
268             fjy0             = _mm_setzero_pd();
269             fjz0             = _mm_setzero_pd();
270             fjx1             = _mm_setzero_pd();
271             fjy1             = _mm_setzero_pd();
272             fjz1             = _mm_setzero_pd();
273             fjx2             = _mm_setzero_pd();
274             fjy2             = _mm_setzero_pd();
275             fjz2             = _mm_setzero_pd();
276
277             /**************************
278              * CALCULATE INTERACTIONS *
279              **************************/
280
281             /* COULOMB ELECTROSTATICS */
282             velec            = _mm_mul_pd(qq00,rinv00);
283             felec            = _mm_mul_pd(velec,rinvsq00);
284
285             /* LENNARD-JONES DISPERSION/REPULSION */
286
287             rinvsix          = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
288             vvdw6            = _mm_mul_pd(c6_00,rinvsix);
289             vvdw12           = _mm_mul_pd(c12_00,_mm_mul_pd(rinvsix,rinvsix));
290             vvdw             = _mm_sub_pd( _mm_mul_pd(vvdw12,one_twelfth) , _mm_mul_pd(vvdw6,one_sixth) );
291             fvdw             = _mm_mul_pd(_mm_sub_pd(vvdw12,vvdw6),rinvsq00);
292
293             /* Update potential sum for this i atom from the interaction with this j atom. */
294             velecsum         = _mm_add_pd(velecsum,velec);
295             vvdwsum          = _mm_add_pd(vvdwsum,vvdw);
296
297             fscal            = _mm_add_pd(felec,fvdw);
298
299             /* Calculate temporary vectorial force */
300             tx               = _mm_mul_pd(fscal,dx00);
301             ty               = _mm_mul_pd(fscal,dy00);
302             tz               = _mm_mul_pd(fscal,dz00);
303
304             /* Update vectorial force */
305             fix0             = _mm_add_pd(fix0,tx);
306             fiy0             = _mm_add_pd(fiy0,ty);
307             fiz0             = _mm_add_pd(fiz0,tz);
308
309             fjx0             = _mm_add_pd(fjx0,tx);
310             fjy0             = _mm_add_pd(fjy0,ty);
311             fjz0             = _mm_add_pd(fjz0,tz);
312
313             /**************************
314              * CALCULATE INTERACTIONS *
315              **************************/
316
317             /* COULOMB ELECTROSTATICS */
318             velec            = _mm_mul_pd(qq01,rinv01);
319             felec            = _mm_mul_pd(velec,rinvsq01);
320
321             /* Update potential sum for this i atom from the interaction with this j atom. */
322             velecsum         = _mm_add_pd(velecsum,velec);
323
324             fscal            = felec;
325
326             /* Calculate temporary vectorial force */
327             tx               = _mm_mul_pd(fscal,dx01);
328             ty               = _mm_mul_pd(fscal,dy01);
329             tz               = _mm_mul_pd(fscal,dz01);
330
331             /* Update vectorial force */
332             fix0             = _mm_add_pd(fix0,tx);
333             fiy0             = _mm_add_pd(fiy0,ty);
334             fiz0             = _mm_add_pd(fiz0,tz);
335
336             fjx1             = _mm_add_pd(fjx1,tx);
337             fjy1             = _mm_add_pd(fjy1,ty);
338             fjz1             = _mm_add_pd(fjz1,tz);
339
340             /**************************
341              * CALCULATE INTERACTIONS *
342              **************************/
343
344             /* COULOMB ELECTROSTATICS */
345             velec            = _mm_mul_pd(qq02,rinv02);
346             felec            = _mm_mul_pd(velec,rinvsq02);
347
348             /* Update potential sum for this i atom from the interaction with this j atom. */
349             velecsum         = _mm_add_pd(velecsum,velec);
350
351             fscal            = felec;
352
353             /* Calculate temporary vectorial force */
354             tx               = _mm_mul_pd(fscal,dx02);
355             ty               = _mm_mul_pd(fscal,dy02);
356             tz               = _mm_mul_pd(fscal,dz02);
357
358             /* Update vectorial force */
359             fix0             = _mm_add_pd(fix0,tx);
360             fiy0             = _mm_add_pd(fiy0,ty);
361             fiz0             = _mm_add_pd(fiz0,tz);
362
363             fjx2             = _mm_add_pd(fjx2,tx);
364             fjy2             = _mm_add_pd(fjy2,ty);
365             fjz2             = _mm_add_pd(fjz2,tz);
366
367             /**************************
368              * CALCULATE INTERACTIONS *
369              **************************/
370
371             /* COULOMB ELECTROSTATICS */
372             velec            = _mm_mul_pd(qq10,rinv10);
373             felec            = _mm_mul_pd(velec,rinvsq10);
374
375             /* Update potential sum for this i atom from the interaction with this j atom. */
376             velecsum         = _mm_add_pd(velecsum,velec);
377
378             fscal            = felec;
379
380             /* Calculate temporary vectorial force */
381             tx               = _mm_mul_pd(fscal,dx10);
382             ty               = _mm_mul_pd(fscal,dy10);
383             tz               = _mm_mul_pd(fscal,dz10);
384
385             /* Update vectorial force */
386             fix1             = _mm_add_pd(fix1,tx);
387             fiy1             = _mm_add_pd(fiy1,ty);
388             fiz1             = _mm_add_pd(fiz1,tz);
389
390             fjx0             = _mm_add_pd(fjx0,tx);
391             fjy0             = _mm_add_pd(fjy0,ty);
392             fjz0             = _mm_add_pd(fjz0,tz);
393
394             /**************************
395              * CALCULATE INTERACTIONS *
396              **************************/
397
398             /* COULOMB ELECTROSTATICS */
399             velec            = _mm_mul_pd(qq11,rinv11);
400             felec            = _mm_mul_pd(velec,rinvsq11);
401
402             /* Update potential sum for this i atom from the interaction with this j atom. */
403             velecsum         = _mm_add_pd(velecsum,velec);
404
405             fscal            = felec;
406
407             /* Calculate temporary vectorial force */
408             tx               = _mm_mul_pd(fscal,dx11);
409             ty               = _mm_mul_pd(fscal,dy11);
410             tz               = _mm_mul_pd(fscal,dz11);
411
412             /* Update vectorial force */
413             fix1             = _mm_add_pd(fix1,tx);
414             fiy1             = _mm_add_pd(fiy1,ty);
415             fiz1             = _mm_add_pd(fiz1,tz);
416
417             fjx1             = _mm_add_pd(fjx1,tx);
418             fjy1             = _mm_add_pd(fjy1,ty);
419             fjz1             = _mm_add_pd(fjz1,tz);
420
421             /**************************
422              * CALCULATE INTERACTIONS *
423              **************************/
424
425             /* COULOMB ELECTROSTATICS */
426             velec            = _mm_mul_pd(qq12,rinv12);
427             felec            = _mm_mul_pd(velec,rinvsq12);
428
429             /* Update potential sum for this i atom from the interaction with this j atom. */
430             velecsum         = _mm_add_pd(velecsum,velec);
431
432             fscal            = felec;
433
434             /* Calculate temporary vectorial force */
435             tx               = _mm_mul_pd(fscal,dx12);
436             ty               = _mm_mul_pd(fscal,dy12);
437             tz               = _mm_mul_pd(fscal,dz12);
438
439             /* Update vectorial force */
440             fix1             = _mm_add_pd(fix1,tx);
441             fiy1             = _mm_add_pd(fiy1,ty);
442             fiz1             = _mm_add_pd(fiz1,tz);
443
444             fjx2             = _mm_add_pd(fjx2,tx);
445             fjy2             = _mm_add_pd(fjy2,ty);
446             fjz2             = _mm_add_pd(fjz2,tz);
447
448             /**************************
449              * CALCULATE INTERACTIONS *
450              **************************/
451
452             /* COULOMB ELECTROSTATICS */
453             velec            = _mm_mul_pd(qq20,rinv20);
454             felec            = _mm_mul_pd(velec,rinvsq20);
455
456             /* Update potential sum for this i atom from the interaction with this j atom. */
457             velecsum         = _mm_add_pd(velecsum,velec);
458
459             fscal            = felec;
460
461             /* Calculate temporary vectorial force */
462             tx               = _mm_mul_pd(fscal,dx20);
463             ty               = _mm_mul_pd(fscal,dy20);
464             tz               = _mm_mul_pd(fscal,dz20);
465
466             /* Update vectorial force */
467             fix2             = _mm_add_pd(fix2,tx);
468             fiy2             = _mm_add_pd(fiy2,ty);
469             fiz2             = _mm_add_pd(fiz2,tz);
470
471             fjx0             = _mm_add_pd(fjx0,tx);
472             fjy0             = _mm_add_pd(fjy0,ty);
473             fjz0             = _mm_add_pd(fjz0,tz);
474
475             /**************************
476              * CALCULATE INTERACTIONS *
477              **************************/
478
479             /* COULOMB ELECTROSTATICS */
480             velec            = _mm_mul_pd(qq21,rinv21);
481             felec            = _mm_mul_pd(velec,rinvsq21);
482
483             /* Update potential sum for this i atom from the interaction with this j atom. */
484             velecsum         = _mm_add_pd(velecsum,velec);
485
486             fscal            = felec;
487
488             /* Calculate temporary vectorial force */
489             tx               = _mm_mul_pd(fscal,dx21);
490             ty               = _mm_mul_pd(fscal,dy21);
491             tz               = _mm_mul_pd(fscal,dz21);
492
493             /* Update vectorial force */
494             fix2             = _mm_add_pd(fix2,tx);
495             fiy2             = _mm_add_pd(fiy2,ty);
496             fiz2             = _mm_add_pd(fiz2,tz);
497
498             fjx1             = _mm_add_pd(fjx1,tx);
499             fjy1             = _mm_add_pd(fjy1,ty);
500             fjz1             = _mm_add_pd(fjz1,tz);
501
502             /**************************
503              * CALCULATE INTERACTIONS *
504              **************************/
505
506             /* COULOMB ELECTROSTATICS */
507             velec            = _mm_mul_pd(qq22,rinv22);
508             felec            = _mm_mul_pd(velec,rinvsq22);
509
510             /* Update potential sum for this i atom from the interaction with this j atom. */
511             velecsum         = _mm_add_pd(velecsum,velec);
512
513             fscal            = felec;
514
515             /* Calculate temporary vectorial force */
516             tx               = _mm_mul_pd(fscal,dx22);
517             ty               = _mm_mul_pd(fscal,dy22);
518             tz               = _mm_mul_pd(fscal,dz22);
519
520             /* Update vectorial force */
521             fix2             = _mm_add_pd(fix2,tx);
522             fiy2             = _mm_add_pd(fiy2,ty);
523             fiz2             = _mm_add_pd(fiz2,tz);
524
525             fjx2             = _mm_add_pd(fjx2,tx);
526             fjy2             = _mm_add_pd(fjy2,ty);
527             fjz2             = _mm_add_pd(fjz2,tz);
528
529             gmx_mm_decrement_3rvec_2ptr_swizzle_pd(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
530
531             /* Inner loop uses 264 flops */
532         }
533
534         if(jidx<j_index_end)
535         {
536
537             jnrA             = jjnr[jidx];
538             j_coord_offsetA  = DIM*jnrA;
539
540             /* load j atom coordinates */
541             gmx_mm_load_3rvec_1ptr_swizzle_pd(x+j_coord_offsetA,
542                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
543
544             /* Calculate displacement vector */
545             dx00             = _mm_sub_pd(ix0,jx0);
546             dy00             = _mm_sub_pd(iy0,jy0);
547             dz00             = _mm_sub_pd(iz0,jz0);
548             dx01             = _mm_sub_pd(ix0,jx1);
549             dy01             = _mm_sub_pd(iy0,jy1);
550             dz01             = _mm_sub_pd(iz0,jz1);
551             dx02             = _mm_sub_pd(ix0,jx2);
552             dy02             = _mm_sub_pd(iy0,jy2);
553             dz02             = _mm_sub_pd(iz0,jz2);
554             dx10             = _mm_sub_pd(ix1,jx0);
555             dy10             = _mm_sub_pd(iy1,jy0);
556             dz10             = _mm_sub_pd(iz1,jz0);
557             dx11             = _mm_sub_pd(ix1,jx1);
558             dy11             = _mm_sub_pd(iy1,jy1);
559             dz11             = _mm_sub_pd(iz1,jz1);
560             dx12             = _mm_sub_pd(ix1,jx2);
561             dy12             = _mm_sub_pd(iy1,jy2);
562             dz12             = _mm_sub_pd(iz1,jz2);
563             dx20             = _mm_sub_pd(ix2,jx0);
564             dy20             = _mm_sub_pd(iy2,jy0);
565             dz20             = _mm_sub_pd(iz2,jz0);
566             dx21             = _mm_sub_pd(ix2,jx1);
567             dy21             = _mm_sub_pd(iy2,jy1);
568             dz21             = _mm_sub_pd(iz2,jz1);
569             dx22             = _mm_sub_pd(ix2,jx2);
570             dy22             = _mm_sub_pd(iy2,jy2);
571             dz22             = _mm_sub_pd(iz2,jz2);
572
573             /* Calculate squared distance and things based on it */
574             rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
575             rsq01            = gmx_mm_calc_rsq_pd(dx01,dy01,dz01);
576             rsq02            = gmx_mm_calc_rsq_pd(dx02,dy02,dz02);
577             rsq10            = gmx_mm_calc_rsq_pd(dx10,dy10,dz10);
578             rsq11            = gmx_mm_calc_rsq_pd(dx11,dy11,dz11);
579             rsq12            = gmx_mm_calc_rsq_pd(dx12,dy12,dz12);
580             rsq20            = gmx_mm_calc_rsq_pd(dx20,dy20,dz20);
581             rsq21            = gmx_mm_calc_rsq_pd(dx21,dy21,dz21);
582             rsq22            = gmx_mm_calc_rsq_pd(dx22,dy22,dz22);
583
584             rinv00           = gmx_mm_invsqrt_pd(rsq00);
585             rinv01           = gmx_mm_invsqrt_pd(rsq01);
586             rinv02           = gmx_mm_invsqrt_pd(rsq02);
587             rinv10           = gmx_mm_invsqrt_pd(rsq10);
588             rinv11           = gmx_mm_invsqrt_pd(rsq11);
589             rinv12           = gmx_mm_invsqrt_pd(rsq12);
590             rinv20           = gmx_mm_invsqrt_pd(rsq20);
591             rinv21           = gmx_mm_invsqrt_pd(rsq21);
592             rinv22           = gmx_mm_invsqrt_pd(rsq22);
593
594             rinvsq00         = _mm_mul_pd(rinv00,rinv00);
595             rinvsq01         = _mm_mul_pd(rinv01,rinv01);
596             rinvsq02         = _mm_mul_pd(rinv02,rinv02);
597             rinvsq10         = _mm_mul_pd(rinv10,rinv10);
598             rinvsq11         = _mm_mul_pd(rinv11,rinv11);
599             rinvsq12         = _mm_mul_pd(rinv12,rinv12);
600             rinvsq20         = _mm_mul_pd(rinv20,rinv20);
601             rinvsq21         = _mm_mul_pd(rinv21,rinv21);
602             rinvsq22         = _mm_mul_pd(rinv22,rinv22);
603
604             fjx0             = _mm_setzero_pd();
605             fjy0             = _mm_setzero_pd();
606             fjz0             = _mm_setzero_pd();
607             fjx1             = _mm_setzero_pd();
608             fjy1             = _mm_setzero_pd();
609             fjz1             = _mm_setzero_pd();
610             fjx2             = _mm_setzero_pd();
611             fjy2             = _mm_setzero_pd();
612             fjz2             = _mm_setzero_pd();
613
614             /**************************
615              * CALCULATE INTERACTIONS *
616              **************************/
617
618             /* COULOMB ELECTROSTATICS */
619             velec            = _mm_mul_pd(qq00,rinv00);
620             felec            = _mm_mul_pd(velec,rinvsq00);
621
622             /* LENNARD-JONES DISPERSION/REPULSION */
623
624             rinvsix          = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
625             vvdw6            = _mm_mul_pd(c6_00,rinvsix);
626             vvdw12           = _mm_mul_pd(c12_00,_mm_mul_pd(rinvsix,rinvsix));
627             vvdw             = _mm_sub_pd( _mm_mul_pd(vvdw12,one_twelfth) , _mm_mul_pd(vvdw6,one_sixth) );
628             fvdw             = _mm_mul_pd(_mm_sub_pd(vvdw12,vvdw6),rinvsq00);
629
630             /* Update potential sum for this i atom from the interaction with this j atom. */
631             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
632             velecsum         = _mm_add_pd(velecsum,velec);
633             vvdw             = _mm_unpacklo_pd(vvdw,_mm_setzero_pd());
634             vvdwsum          = _mm_add_pd(vvdwsum,vvdw);
635
636             fscal            = _mm_add_pd(felec,fvdw);
637
638             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
639
640             /* Calculate temporary vectorial force */
641             tx               = _mm_mul_pd(fscal,dx00);
642             ty               = _mm_mul_pd(fscal,dy00);
643             tz               = _mm_mul_pd(fscal,dz00);
644
645             /* Update vectorial force */
646             fix0             = _mm_add_pd(fix0,tx);
647             fiy0             = _mm_add_pd(fiy0,ty);
648             fiz0             = _mm_add_pd(fiz0,tz);
649
650             fjx0             = _mm_add_pd(fjx0,tx);
651             fjy0             = _mm_add_pd(fjy0,ty);
652             fjz0             = _mm_add_pd(fjz0,tz);
653
654             /**************************
655              * CALCULATE INTERACTIONS *
656              **************************/
657
658             /* COULOMB ELECTROSTATICS */
659             velec            = _mm_mul_pd(qq01,rinv01);
660             felec            = _mm_mul_pd(velec,rinvsq01);
661
662             /* Update potential sum for this i atom from the interaction with this j atom. */
663             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
664             velecsum         = _mm_add_pd(velecsum,velec);
665
666             fscal            = felec;
667
668             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
669
670             /* Calculate temporary vectorial force */
671             tx               = _mm_mul_pd(fscal,dx01);
672             ty               = _mm_mul_pd(fscal,dy01);
673             tz               = _mm_mul_pd(fscal,dz01);
674
675             /* Update vectorial force */
676             fix0             = _mm_add_pd(fix0,tx);
677             fiy0             = _mm_add_pd(fiy0,ty);
678             fiz0             = _mm_add_pd(fiz0,tz);
679
680             fjx1             = _mm_add_pd(fjx1,tx);
681             fjy1             = _mm_add_pd(fjy1,ty);
682             fjz1             = _mm_add_pd(fjz1,tz);
683
684             /**************************
685              * CALCULATE INTERACTIONS *
686              **************************/
687
688             /* COULOMB ELECTROSTATICS */
689             velec            = _mm_mul_pd(qq02,rinv02);
690             felec            = _mm_mul_pd(velec,rinvsq02);
691
692             /* Update potential sum for this i atom from the interaction with this j atom. */
693             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
694             velecsum         = _mm_add_pd(velecsum,velec);
695
696             fscal            = felec;
697
698             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
699
700             /* Calculate temporary vectorial force */
701             tx               = _mm_mul_pd(fscal,dx02);
702             ty               = _mm_mul_pd(fscal,dy02);
703             tz               = _mm_mul_pd(fscal,dz02);
704
705             /* Update vectorial force */
706             fix0             = _mm_add_pd(fix0,tx);
707             fiy0             = _mm_add_pd(fiy0,ty);
708             fiz0             = _mm_add_pd(fiz0,tz);
709
710             fjx2             = _mm_add_pd(fjx2,tx);
711             fjy2             = _mm_add_pd(fjy2,ty);
712             fjz2             = _mm_add_pd(fjz2,tz);
713
714             /**************************
715              * CALCULATE INTERACTIONS *
716              **************************/
717
718             /* COULOMB ELECTROSTATICS */
719             velec            = _mm_mul_pd(qq10,rinv10);
720             felec            = _mm_mul_pd(velec,rinvsq10);
721
722             /* Update potential sum for this i atom from the interaction with this j atom. */
723             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
724             velecsum         = _mm_add_pd(velecsum,velec);
725
726             fscal            = felec;
727
728             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
729
730             /* Calculate temporary vectorial force */
731             tx               = _mm_mul_pd(fscal,dx10);
732             ty               = _mm_mul_pd(fscal,dy10);
733             tz               = _mm_mul_pd(fscal,dz10);
734
735             /* Update vectorial force */
736             fix1             = _mm_add_pd(fix1,tx);
737             fiy1             = _mm_add_pd(fiy1,ty);
738             fiz1             = _mm_add_pd(fiz1,tz);
739
740             fjx0             = _mm_add_pd(fjx0,tx);
741             fjy0             = _mm_add_pd(fjy0,ty);
742             fjz0             = _mm_add_pd(fjz0,tz);
743
744             /**************************
745              * CALCULATE INTERACTIONS *
746              **************************/
747
748             /* COULOMB ELECTROSTATICS */
749             velec            = _mm_mul_pd(qq11,rinv11);
750             felec            = _mm_mul_pd(velec,rinvsq11);
751
752             /* Update potential sum for this i atom from the interaction with this j atom. */
753             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
754             velecsum         = _mm_add_pd(velecsum,velec);
755
756             fscal            = felec;
757
758             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
759
760             /* Calculate temporary vectorial force */
761             tx               = _mm_mul_pd(fscal,dx11);
762             ty               = _mm_mul_pd(fscal,dy11);
763             tz               = _mm_mul_pd(fscal,dz11);
764
765             /* Update vectorial force */
766             fix1             = _mm_add_pd(fix1,tx);
767             fiy1             = _mm_add_pd(fiy1,ty);
768             fiz1             = _mm_add_pd(fiz1,tz);
769
770             fjx1             = _mm_add_pd(fjx1,tx);
771             fjy1             = _mm_add_pd(fjy1,ty);
772             fjz1             = _mm_add_pd(fjz1,tz);
773
774             /**************************
775              * CALCULATE INTERACTIONS *
776              **************************/
777
778             /* COULOMB ELECTROSTATICS */
779             velec            = _mm_mul_pd(qq12,rinv12);
780             felec            = _mm_mul_pd(velec,rinvsq12);
781
782             /* Update potential sum for this i atom from the interaction with this j atom. */
783             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
784             velecsum         = _mm_add_pd(velecsum,velec);
785
786             fscal            = felec;
787
788             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
789
790             /* Calculate temporary vectorial force */
791             tx               = _mm_mul_pd(fscal,dx12);
792             ty               = _mm_mul_pd(fscal,dy12);
793             tz               = _mm_mul_pd(fscal,dz12);
794
795             /* Update vectorial force */
796             fix1             = _mm_add_pd(fix1,tx);
797             fiy1             = _mm_add_pd(fiy1,ty);
798             fiz1             = _mm_add_pd(fiz1,tz);
799
800             fjx2             = _mm_add_pd(fjx2,tx);
801             fjy2             = _mm_add_pd(fjy2,ty);
802             fjz2             = _mm_add_pd(fjz2,tz);
803
804             /**************************
805              * CALCULATE INTERACTIONS *
806              **************************/
807
808             /* COULOMB ELECTROSTATICS */
809             velec            = _mm_mul_pd(qq20,rinv20);
810             felec            = _mm_mul_pd(velec,rinvsq20);
811
812             /* Update potential sum for this i atom from the interaction with this j atom. */
813             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
814             velecsum         = _mm_add_pd(velecsum,velec);
815
816             fscal            = felec;
817
818             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
819
820             /* Calculate temporary vectorial force */
821             tx               = _mm_mul_pd(fscal,dx20);
822             ty               = _mm_mul_pd(fscal,dy20);
823             tz               = _mm_mul_pd(fscal,dz20);
824
825             /* Update vectorial force */
826             fix2             = _mm_add_pd(fix2,tx);
827             fiy2             = _mm_add_pd(fiy2,ty);
828             fiz2             = _mm_add_pd(fiz2,tz);
829
830             fjx0             = _mm_add_pd(fjx0,tx);
831             fjy0             = _mm_add_pd(fjy0,ty);
832             fjz0             = _mm_add_pd(fjz0,tz);
833
834             /**************************
835              * CALCULATE INTERACTIONS *
836              **************************/
837
838             /* COULOMB ELECTROSTATICS */
839             velec            = _mm_mul_pd(qq21,rinv21);
840             felec            = _mm_mul_pd(velec,rinvsq21);
841
842             /* Update potential sum for this i atom from the interaction with this j atom. */
843             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
844             velecsum         = _mm_add_pd(velecsum,velec);
845
846             fscal            = felec;
847
848             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
849
850             /* Calculate temporary vectorial force */
851             tx               = _mm_mul_pd(fscal,dx21);
852             ty               = _mm_mul_pd(fscal,dy21);
853             tz               = _mm_mul_pd(fscal,dz21);
854
855             /* Update vectorial force */
856             fix2             = _mm_add_pd(fix2,tx);
857             fiy2             = _mm_add_pd(fiy2,ty);
858             fiz2             = _mm_add_pd(fiz2,tz);
859
860             fjx1             = _mm_add_pd(fjx1,tx);
861             fjy1             = _mm_add_pd(fjy1,ty);
862             fjz1             = _mm_add_pd(fjz1,tz);
863
864             /**************************
865              * CALCULATE INTERACTIONS *
866              **************************/
867
868             /* COULOMB ELECTROSTATICS */
869             velec            = _mm_mul_pd(qq22,rinv22);
870             felec            = _mm_mul_pd(velec,rinvsq22);
871
872             /* Update potential sum for this i atom from the interaction with this j atom. */
873             velec            = _mm_unpacklo_pd(velec,_mm_setzero_pd());
874             velecsum         = _mm_add_pd(velecsum,velec);
875
876             fscal            = felec;
877
878             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
879
880             /* Calculate temporary vectorial force */
881             tx               = _mm_mul_pd(fscal,dx22);
882             ty               = _mm_mul_pd(fscal,dy22);
883             tz               = _mm_mul_pd(fscal,dz22);
884
885             /* Update vectorial force */
886             fix2             = _mm_add_pd(fix2,tx);
887             fiy2             = _mm_add_pd(fiy2,ty);
888             fiz2             = _mm_add_pd(fiz2,tz);
889
890             fjx2             = _mm_add_pd(fjx2,tx);
891             fjy2             = _mm_add_pd(fjy2,ty);
892             fjz2             = _mm_add_pd(fjz2,tz);
893
894             gmx_mm_decrement_3rvec_1ptr_swizzle_pd(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
895
896             /* Inner loop uses 264 flops */
897         }
898
899         /* End of innermost loop */
900
901         gmx_mm_update_iforce_3atom_swizzle_pd(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
902                                               f+i_coord_offset,fshift+i_shift_offset);
903
904         ggid                        = gid[iidx];
905         /* Update potential energies */
906         gmx_mm_update_1pot_pd(velecsum,kernel_data->energygrp_elec+ggid);
907         gmx_mm_update_1pot_pd(vvdwsum,kernel_data->energygrp_vdw+ggid);
908
909         /* Increment number of inner iterations */
910         inneriter                  += j_index_end - j_index_start;
911
912         /* Outer loop uses 20 flops */
913     }
914
915     /* Increment number of outer iterations */
916     outeriter        += nri;
917
918     /* Update outer/inner flops */
919
920     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*264);
921 }
922 /*
923  * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse2_double
924  * Electrostatics interaction: Coulomb
925  * VdW interaction:            LennardJones
926  * Geometry:                   Water3-Water3
927  * Calculate force/pot:        Force
928  */
929 void
930 nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse2_double
931                     (t_nblist                    * gmx_restrict       nlist,
932                      rvec                        * gmx_restrict          xx,
933                      rvec                        * gmx_restrict          ff,
934                      t_forcerec                  * gmx_restrict          fr,
935                      t_mdatoms                   * gmx_restrict     mdatoms,
936                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
937                      t_nrnb                      * gmx_restrict        nrnb)
938 {
939     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
940      * just 0 for non-waters.
941      * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
942      * jnr indices corresponding to data put in the four positions in the SIMD register.
943      */
944     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
945     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
946     int              jnrA,jnrB;
947     int              j_coord_offsetA,j_coord_offsetB;
948     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
949     real             rcutoff_scalar;
950     real             *shiftvec,*fshift,*x,*f;
951     __m128d          tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
952     int              vdwioffset0;
953     __m128d          ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
954     int              vdwioffset1;
955     __m128d          ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
956     int              vdwioffset2;
957     __m128d          ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
958     int              vdwjidx0A,vdwjidx0B;
959     __m128d          jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
960     int              vdwjidx1A,vdwjidx1B;
961     __m128d          jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
962     int              vdwjidx2A,vdwjidx2B;
963     __m128d          jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
964     __m128d          dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
965     __m128d          dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
966     __m128d          dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
967     __m128d          dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
968     __m128d          dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
969     __m128d          dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
970     __m128d          dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
971     __m128d          dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
972     __m128d          dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
973     __m128d          velec,felec,velecsum,facel,crf,krf,krf2;
974     real             *charge;
975     int              nvdwtype;
976     __m128d          rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
977     int              *vdwtype;
978     real             *vdwparam;
979     __m128d          one_sixth   = _mm_set1_pd(1.0/6.0);
980     __m128d          one_twelfth = _mm_set1_pd(1.0/12.0);
981     __m128d          dummy_mask,cutoff_mask;
982     __m128d          signbit   = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
983     __m128d          one     = _mm_set1_pd(1.0);
984     __m128d          two     = _mm_set1_pd(2.0);
985     x                = xx[0];
986     f                = ff[0];
987
988     nri              = nlist->nri;
989     iinr             = nlist->iinr;
990     jindex           = nlist->jindex;
991     jjnr             = nlist->jjnr;
992     shiftidx         = nlist->shift;
993     gid              = nlist->gid;
994     shiftvec         = fr->shift_vec[0];
995     fshift           = fr->fshift[0];
996     facel            = _mm_set1_pd(fr->epsfac);
997     charge           = mdatoms->chargeA;
998     nvdwtype         = fr->ntype;
999     vdwparam         = fr->nbfp;
1000     vdwtype          = mdatoms->typeA;
1001
1002     /* Setup water-specific parameters */
1003     inr              = nlist->iinr[0];
1004     iq0              = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+0]));
1005     iq1              = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+1]));
1006     iq2              = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+2]));
1007     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
1008
1009     jq0              = _mm_set1_pd(charge[inr+0]);
1010     jq1              = _mm_set1_pd(charge[inr+1]);
1011     jq2              = _mm_set1_pd(charge[inr+2]);
1012     vdwjidx0A        = 2*vdwtype[inr+0];
1013     qq00             = _mm_mul_pd(iq0,jq0);
1014     c6_00            = _mm_set1_pd(vdwparam[vdwioffset0+vdwjidx0A]);
1015     c12_00           = _mm_set1_pd(vdwparam[vdwioffset0+vdwjidx0A+1]);
1016     qq01             = _mm_mul_pd(iq0,jq1);
1017     qq02             = _mm_mul_pd(iq0,jq2);
1018     qq10             = _mm_mul_pd(iq1,jq0);
1019     qq11             = _mm_mul_pd(iq1,jq1);
1020     qq12             = _mm_mul_pd(iq1,jq2);
1021     qq20             = _mm_mul_pd(iq2,jq0);
1022     qq21             = _mm_mul_pd(iq2,jq1);
1023     qq22             = _mm_mul_pd(iq2,jq2);
1024
1025     /* Avoid stupid compiler warnings */
1026     jnrA = jnrB = 0;
1027     j_coord_offsetA = 0;
1028     j_coord_offsetB = 0;
1029
1030     outeriter        = 0;
1031     inneriter        = 0;
1032
1033     /* Start outer loop over neighborlists */
1034     for(iidx=0; iidx<nri; iidx++)
1035     {
1036         /* Load shift vector for this list */
1037         i_shift_offset   = DIM*shiftidx[iidx];
1038
1039         /* Load limits for loop over neighbors */
1040         j_index_start    = jindex[iidx];
1041         j_index_end      = jindex[iidx+1];
1042
1043         /* Get outer coordinate index */
1044         inr              = iinr[iidx];
1045         i_coord_offset   = DIM*inr;
1046
1047         /* Load i particle coords and add shift vector */
1048         gmx_mm_load_shift_and_3rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,
1049                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
1050
1051         fix0             = _mm_setzero_pd();
1052         fiy0             = _mm_setzero_pd();
1053         fiz0             = _mm_setzero_pd();
1054         fix1             = _mm_setzero_pd();
1055         fiy1             = _mm_setzero_pd();
1056         fiz1             = _mm_setzero_pd();
1057         fix2             = _mm_setzero_pd();
1058         fiy2             = _mm_setzero_pd();
1059         fiz2             = _mm_setzero_pd();
1060
1061         /* Start inner kernel loop */
1062         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1063         {
1064
1065             /* Get j neighbor index, and coordinate index */
1066             jnrA             = jjnr[jidx];
1067             jnrB             = jjnr[jidx+1];
1068             j_coord_offsetA  = DIM*jnrA;
1069             j_coord_offsetB  = DIM*jnrB;
1070
1071             /* load j atom coordinates */
1072             gmx_mm_load_3rvec_2ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
1073                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1074
1075             /* Calculate displacement vector */
1076             dx00             = _mm_sub_pd(ix0,jx0);
1077             dy00             = _mm_sub_pd(iy0,jy0);
1078             dz00             = _mm_sub_pd(iz0,jz0);
1079             dx01             = _mm_sub_pd(ix0,jx1);
1080             dy01             = _mm_sub_pd(iy0,jy1);
1081             dz01             = _mm_sub_pd(iz0,jz1);
1082             dx02             = _mm_sub_pd(ix0,jx2);
1083             dy02             = _mm_sub_pd(iy0,jy2);
1084             dz02             = _mm_sub_pd(iz0,jz2);
1085             dx10             = _mm_sub_pd(ix1,jx0);
1086             dy10             = _mm_sub_pd(iy1,jy0);
1087             dz10             = _mm_sub_pd(iz1,jz0);
1088             dx11             = _mm_sub_pd(ix1,jx1);
1089             dy11             = _mm_sub_pd(iy1,jy1);
1090             dz11             = _mm_sub_pd(iz1,jz1);
1091             dx12             = _mm_sub_pd(ix1,jx2);
1092             dy12             = _mm_sub_pd(iy1,jy2);
1093             dz12             = _mm_sub_pd(iz1,jz2);
1094             dx20             = _mm_sub_pd(ix2,jx0);
1095             dy20             = _mm_sub_pd(iy2,jy0);
1096             dz20             = _mm_sub_pd(iz2,jz0);
1097             dx21             = _mm_sub_pd(ix2,jx1);
1098             dy21             = _mm_sub_pd(iy2,jy1);
1099             dz21             = _mm_sub_pd(iz2,jz1);
1100             dx22             = _mm_sub_pd(ix2,jx2);
1101             dy22             = _mm_sub_pd(iy2,jy2);
1102             dz22             = _mm_sub_pd(iz2,jz2);
1103
1104             /* Calculate squared distance and things based on it */
1105             rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
1106             rsq01            = gmx_mm_calc_rsq_pd(dx01,dy01,dz01);
1107             rsq02            = gmx_mm_calc_rsq_pd(dx02,dy02,dz02);
1108             rsq10            = gmx_mm_calc_rsq_pd(dx10,dy10,dz10);
1109             rsq11            = gmx_mm_calc_rsq_pd(dx11,dy11,dz11);
1110             rsq12            = gmx_mm_calc_rsq_pd(dx12,dy12,dz12);
1111             rsq20            = gmx_mm_calc_rsq_pd(dx20,dy20,dz20);
1112             rsq21            = gmx_mm_calc_rsq_pd(dx21,dy21,dz21);
1113             rsq22            = gmx_mm_calc_rsq_pd(dx22,dy22,dz22);
1114
1115             rinv00           = gmx_mm_invsqrt_pd(rsq00);
1116             rinv01           = gmx_mm_invsqrt_pd(rsq01);
1117             rinv02           = gmx_mm_invsqrt_pd(rsq02);
1118             rinv10           = gmx_mm_invsqrt_pd(rsq10);
1119             rinv11           = gmx_mm_invsqrt_pd(rsq11);
1120             rinv12           = gmx_mm_invsqrt_pd(rsq12);
1121             rinv20           = gmx_mm_invsqrt_pd(rsq20);
1122             rinv21           = gmx_mm_invsqrt_pd(rsq21);
1123             rinv22           = gmx_mm_invsqrt_pd(rsq22);
1124
1125             rinvsq00         = _mm_mul_pd(rinv00,rinv00);
1126             rinvsq01         = _mm_mul_pd(rinv01,rinv01);
1127             rinvsq02         = _mm_mul_pd(rinv02,rinv02);
1128             rinvsq10         = _mm_mul_pd(rinv10,rinv10);
1129             rinvsq11         = _mm_mul_pd(rinv11,rinv11);
1130             rinvsq12         = _mm_mul_pd(rinv12,rinv12);
1131             rinvsq20         = _mm_mul_pd(rinv20,rinv20);
1132             rinvsq21         = _mm_mul_pd(rinv21,rinv21);
1133             rinvsq22         = _mm_mul_pd(rinv22,rinv22);
1134
1135             fjx0             = _mm_setzero_pd();
1136             fjy0             = _mm_setzero_pd();
1137             fjz0             = _mm_setzero_pd();
1138             fjx1             = _mm_setzero_pd();
1139             fjy1             = _mm_setzero_pd();
1140             fjz1             = _mm_setzero_pd();
1141             fjx2             = _mm_setzero_pd();
1142             fjy2             = _mm_setzero_pd();
1143             fjz2             = _mm_setzero_pd();
1144
1145             /**************************
1146              * CALCULATE INTERACTIONS *
1147              **************************/
1148
1149             /* COULOMB ELECTROSTATICS */
1150             velec            = _mm_mul_pd(qq00,rinv00);
1151             felec            = _mm_mul_pd(velec,rinvsq00);
1152
1153             /* LENNARD-JONES DISPERSION/REPULSION */
1154
1155             rinvsix          = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
1156             fvdw             = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(c12_00,rinvsix),c6_00),_mm_mul_pd(rinvsix,rinvsq00));
1157
1158             fscal            = _mm_add_pd(felec,fvdw);
1159
1160             /* Calculate temporary vectorial force */
1161             tx               = _mm_mul_pd(fscal,dx00);
1162             ty               = _mm_mul_pd(fscal,dy00);
1163             tz               = _mm_mul_pd(fscal,dz00);
1164
1165             /* Update vectorial force */
1166             fix0             = _mm_add_pd(fix0,tx);
1167             fiy0             = _mm_add_pd(fiy0,ty);
1168             fiz0             = _mm_add_pd(fiz0,tz);
1169
1170             fjx0             = _mm_add_pd(fjx0,tx);
1171             fjy0             = _mm_add_pd(fjy0,ty);
1172             fjz0             = _mm_add_pd(fjz0,tz);
1173
1174             /**************************
1175              * CALCULATE INTERACTIONS *
1176              **************************/
1177
1178             /* COULOMB ELECTROSTATICS */
1179             velec            = _mm_mul_pd(qq01,rinv01);
1180             felec            = _mm_mul_pd(velec,rinvsq01);
1181
1182             fscal            = felec;
1183
1184             /* Calculate temporary vectorial force */
1185             tx               = _mm_mul_pd(fscal,dx01);
1186             ty               = _mm_mul_pd(fscal,dy01);
1187             tz               = _mm_mul_pd(fscal,dz01);
1188
1189             /* Update vectorial force */
1190             fix0             = _mm_add_pd(fix0,tx);
1191             fiy0             = _mm_add_pd(fiy0,ty);
1192             fiz0             = _mm_add_pd(fiz0,tz);
1193
1194             fjx1             = _mm_add_pd(fjx1,tx);
1195             fjy1             = _mm_add_pd(fjy1,ty);
1196             fjz1             = _mm_add_pd(fjz1,tz);
1197
1198             /**************************
1199              * CALCULATE INTERACTIONS *
1200              **************************/
1201
1202             /* COULOMB ELECTROSTATICS */
1203             velec            = _mm_mul_pd(qq02,rinv02);
1204             felec            = _mm_mul_pd(velec,rinvsq02);
1205
1206             fscal            = felec;
1207
1208             /* Calculate temporary vectorial force */
1209             tx               = _mm_mul_pd(fscal,dx02);
1210             ty               = _mm_mul_pd(fscal,dy02);
1211             tz               = _mm_mul_pd(fscal,dz02);
1212
1213             /* Update vectorial force */
1214             fix0             = _mm_add_pd(fix0,tx);
1215             fiy0             = _mm_add_pd(fiy0,ty);
1216             fiz0             = _mm_add_pd(fiz0,tz);
1217
1218             fjx2             = _mm_add_pd(fjx2,tx);
1219             fjy2             = _mm_add_pd(fjy2,ty);
1220             fjz2             = _mm_add_pd(fjz2,tz);
1221
1222             /**************************
1223              * CALCULATE INTERACTIONS *
1224              **************************/
1225
1226             /* COULOMB ELECTROSTATICS */
1227             velec            = _mm_mul_pd(qq10,rinv10);
1228             felec            = _mm_mul_pd(velec,rinvsq10);
1229
1230             fscal            = felec;
1231
1232             /* Calculate temporary vectorial force */
1233             tx               = _mm_mul_pd(fscal,dx10);
1234             ty               = _mm_mul_pd(fscal,dy10);
1235             tz               = _mm_mul_pd(fscal,dz10);
1236
1237             /* Update vectorial force */
1238             fix1             = _mm_add_pd(fix1,tx);
1239             fiy1             = _mm_add_pd(fiy1,ty);
1240             fiz1             = _mm_add_pd(fiz1,tz);
1241
1242             fjx0             = _mm_add_pd(fjx0,tx);
1243             fjy0             = _mm_add_pd(fjy0,ty);
1244             fjz0             = _mm_add_pd(fjz0,tz);
1245
1246             /**************************
1247              * CALCULATE INTERACTIONS *
1248              **************************/
1249
1250             /* COULOMB ELECTROSTATICS */
1251             velec            = _mm_mul_pd(qq11,rinv11);
1252             felec            = _mm_mul_pd(velec,rinvsq11);
1253
1254             fscal            = felec;
1255
1256             /* Calculate temporary vectorial force */
1257             tx               = _mm_mul_pd(fscal,dx11);
1258             ty               = _mm_mul_pd(fscal,dy11);
1259             tz               = _mm_mul_pd(fscal,dz11);
1260
1261             /* Update vectorial force */
1262             fix1             = _mm_add_pd(fix1,tx);
1263             fiy1             = _mm_add_pd(fiy1,ty);
1264             fiz1             = _mm_add_pd(fiz1,tz);
1265
1266             fjx1             = _mm_add_pd(fjx1,tx);
1267             fjy1             = _mm_add_pd(fjy1,ty);
1268             fjz1             = _mm_add_pd(fjz1,tz);
1269
1270             /**************************
1271              * CALCULATE INTERACTIONS *
1272              **************************/
1273
1274             /* COULOMB ELECTROSTATICS */
1275             velec            = _mm_mul_pd(qq12,rinv12);
1276             felec            = _mm_mul_pd(velec,rinvsq12);
1277
1278             fscal            = felec;
1279
1280             /* Calculate temporary vectorial force */
1281             tx               = _mm_mul_pd(fscal,dx12);
1282             ty               = _mm_mul_pd(fscal,dy12);
1283             tz               = _mm_mul_pd(fscal,dz12);
1284
1285             /* Update vectorial force */
1286             fix1             = _mm_add_pd(fix1,tx);
1287             fiy1             = _mm_add_pd(fiy1,ty);
1288             fiz1             = _mm_add_pd(fiz1,tz);
1289
1290             fjx2             = _mm_add_pd(fjx2,tx);
1291             fjy2             = _mm_add_pd(fjy2,ty);
1292             fjz2             = _mm_add_pd(fjz2,tz);
1293
1294             /**************************
1295              * CALCULATE INTERACTIONS *
1296              **************************/
1297
1298             /* COULOMB ELECTROSTATICS */
1299             velec            = _mm_mul_pd(qq20,rinv20);
1300             felec            = _mm_mul_pd(velec,rinvsq20);
1301
1302             fscal            = felec;
1303
1304             /* Calculate temporary vectorial force */
1305             tx               = _mm_mul_pd(fscal,dx20);
1306             ty               = _mm_mul_pd(fscal,dy20);
1307             tz               = _mm_mul_pd(fscal,dz20);
1308
1309             /* Update vectorial force */
1310             fix2             = _mm_add_pd(fix2,tx);
1311             fiy2             = _mm_add_pd(fiy2,ty);
1312             fiz2             = _mm_add_pd(fiz2,tz);
1313
1314             fjx0             = _mm_add_pd(fjx0,tx);
1315             fjy0             = _mm_add_pd(fjy0,ty);
1316             fjz0             = _mm_add_pd(fjz0,tz);
1317
1318             /**************************
1319              * CALCULATE INTERACTIONS *
1320              **************************/
1321
1322             /* COULOMB ELECTROSTATICS */
1323             velec            = _mm_mul_pd(qq21,rinv21);
1324             felec            = _mm_mul_pd(velec,rinvsq21);
1325
1326             fscal            = felec;
1327
1328             /* Calculate temporary vectorial force */
1329             tx               = _mm_mul_pd(fscal,dx21);
1330             ty               = _mm_mul_pd(fscal,dy21);
1331             tz               = _mm_mul_pd(fscal,dz21);
1332
1333             /* Update vectorial force */
1334             fix2             = _mm_add_pd(fix2,tx);
1335             fiy2             = _mm_add_pd(fiy2,ty);
1336             fiz2             = _mm_add_pd(fiz2,tz);
1337
1338             fjx1             = _mm_add_pd(fjx1,tx);
1339             fjy1             = _mm_add_pd(fjy1,ty);
1340             fjz1             = _mm_add_pd(fjz1,tz);
1341
1342             /**************************
1343              * CALCULATE INTERACTIONS *
1344              **************************/
1345
1346             /* COULOMB ELECTROSTATICS */
1347             velec            = _mm_mul_pd(qq22,rinv22);
1348             felec            = _mm_mul_pd(velec,rinvsq22);
1349
1350             fscal            = felec;
1351
1352             /* Calculate temporary vectorial force */
1353             tx               = _mm_mul_pd(fscal,dx22);
1354             ty               = _mm_mul_pd(fscal,dy22);
1355             tz               = _mm_mul_pd(fscal,dz22);
1356
1357             /* Update vectorial force */
1358             fix2             = _mm_add_pd(fix2,tx);
1359             fiy2             = _mm_add_pd(fiy2,ty);
1360             fiz2             = _mm_add_pd(fiz2,tz);
1361
1362             fjx2             = _mm_add_pd(fjx2,tx);
1363             fjy2             = _mm_add_pd(fjy2,ty);
1364             fjz2             = _mm_add_pd(fjz2,tz);
1365
1366             gmx_mm_decrement_3rvec_2ptr_swizzle_pd(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1367
1368             /* Inner loop uses 250 flops */
1369         }
1370
1371         if(jidx<j_index_end)
1372         {
1373
1374             jnrA             = jjnr[jidx];
1375             j_coord_offsetA  = DIM*jnrA;
1376
1377             /* load j atom coordinates */
1378             gmx_mm_load_3rvec_1ptr_swizzle_pd(x+j_coord_offsetA,
1379                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1380
1381             /* Calculate displacement vector */
1382             dx00             = _mm_sub_pd(ix0,jx0);
1383             dy00             = _mm_sub_pd(iy0,jy0);
1384             dz00             = _mm_sub_pd(iz0,jz0);
1385             dx01             = _mm_sub_pd(ix0,jx1);
1386             dy01             = _mm_sub_pd(iy0,jy1);
1387             dz01             = _mm_sub_pd(iz0,jz1);
1388             dx02             = _mm_sub_pd(ix0,jx2);
1389             dy02             = _mm_sub_pd(iy0,jy2);
1390             dz02             = _mm_sub_pd(iz0,jz2);
1391             dx10             = _mm_sub_pd(ix1,jx0);
1392             dy10             = _mm_sub_pd(iy1,jy0);
1393             dz10             = _mm_sub_pd(iz1,jz0);
1394             dx11             = _mm_sub_pd(ix1,jx1);
1395             dy11             = _mm_sub_pd(iy1,jy1);
1396             dz11             = _mm_sub_pd(iz1,jz1);
1397             dx12             = _mm_sub_pd(ix1,jx2);
1398             dy12             = _mm_sub_pd(iy1,jy2);
1399             dz12             = _mm_sub_pd(iz1,jz2);
1400             dx20             = _mm_sub_pd(ix2,jx0);
1401             dy20             = _mm_sub_pd(iy2,jy0);
1402             dz20             = _mm_sub_pd(iz2,jz0);
1403             dx21             = _mm_sub_pd(ix2,jx1);
1404             dy21             = _mm_sub_pd(iy2,jy1);
1405             dz21             = _mm_sub_pd(iz2,jz1);
1406             dx22             = _mm_sub_pd(ix2,jx2);
1407             dy22             = _mm_sub_pd(iy2,jy2);
1408             dz22             = _mm_sub_pd(iz2,jz2);
1409
1410             /* Calculate squared distance and things based on it */
1411             rsq00            = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
1412             rsq01            = gmx_mm_calc_rsq_pd(dx01,dy01,dz01);
1413             rsq02            = gmx_mm_calc_rsq_pd(dx02,dy02,dz02);
1414             rsq10            = gmx_mm_calc_rsq_pd(dx10,dy10,dz10);
1415             rsq11            = gmx_mm_calc_rsq_pd(dx11,dy11,dz11);
1416             rsq12            = gmx_mm_calc_rsq_pd(dx12,dy12,dz12);
1417             rsq20            = gmx_mm_calc_rsq_pd(dx20,dy20,dz20);
1418             rsq21            = gmx_mm_calc_rsq_pd(dx21,dy21,dz21);
1419             rsq22            = gmx_mm_calc_rsq_pd(dx22,dy22,dz22);
1420
1421             rinv00           = gmx_mm_invsqrt_pd(rsq00);
1422             rinv01           = gmx_mm_invsqrt_pd(rsq01);
1423             rinv02           = gmx_mm_invsqrt_pd(rsq02);
1424             rinv10           = gmx_mm_invsqrt_pd(rsq10);
1425             rinv11           = gmx_mm_invsqrt_pd(rsq11);
1426             rinv12           = gmx_mm_invsqrt_pd(rsq12);
1427             rinv20           = gmx_mm_invsqrt_pd(rsq20);
1428             rinv21           = gmx_mm_invsqrt_pd(rsq21);
1429             rinv22           = gmx_mm_invsqrt_pd(rsq22);
1430
1431             rinvsq00         = _mm_mul_pd(rinv00,rinv00);
1432             rinvsq01         = _mm_mul_pd(rinv01,rinv01);
1433             rinvsq02         = _mm_mul_pd(rinv02,rinv02);
1434             rinvsq10         = _mm_mul_pd(rinv10,rinv10);
1435             rinvsq11         = _mm_mul_pd(rinv11,rinv11);
1436             rinvsq12         = _mm_mul_pd(rinv12,rinv12);
1437             rinvsq20         = _mm_mul_pd(rinv20,rinv20);
1438             rinvsq21         = _mm_mul_pd(rinv21,rinv21);
1439             rinvsq22         = _mm_mul_pd(rinv22,rinv22);
1440
1441             fjx0             = _mm_setzero_pd();
1442             fjy0             = _mm_setzero_pd();
1443             fjz0             = _mm_setzero_pd();
1444             fjx1             = _mm_setzero_pd();
1445             fjy1             = _mm_setzero_pd();
1446             fjz1             = _mm_setzero_pd();
1447             fjx2             = _mm_setzero_pd();
1448             fjy2             = _mm_setzero_pd();
1449             fjz2             = _mm_setzero_pd();
1450
1451             /**************************
1452              * CALCULATE INTERACTIONS *
1453              **************************/
1454
1455             /* COULOMB ELECTROSTATICS */
1456             velec            = _mm_mul_pd(qq00,rinv00);
1457             felec            = _mm_mul_pd(velec,rinvsq00);
1458
1459             /* LENNARD-JONES DISPERSION/REPULSION */
1460
1461             rinvsix          = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
1462             fvdw             = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(c12_00,rinvsix),c6_00),_mm_mul_pd(rinvsix,rinvsq00));
1463
1464             fscal            = _mm_add_pd(felec,fvdw);
1465
1466             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1467
1468             /* Calculate temporary vectorial force */
1469             tx               = _mm_mul_pd(fscal,dx00);
1470             ty               = _mm_mul_pd(fscal,dy00);
1471             tz               = _mm_mul_pd(fscal,dz00);
1472
1473             /* Update vectorial force */
1474             fix0             = _mm_add_pd(fix0,tx);
1475             fiy0             = _mm_add_pd(fiy0,ty);
1476             fiz0             = _mm_add_pd(fiz0,tz);
1477
1478             fjx0             = _mm_add_pd(fjx0,tx);
1479             fjy0             = _mm_add_pd(fjy0,ty);
1480             fjz0             = _mm_add_pd(fjz0,tz);
1481
1482             /**************************
1483              * CALCULATE INTERACTIONS *
1484              **************************/
1485
1486             /* COULOMB ELECTROSTATICS */
1487             velec            = _mm_mul_pd(qq01,rinv01);
1488             felec            = _mm_mul_pd(velec,rinvsq01);
1489
1490             fscal            = felec;
1491
1492             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1493
1494             /* Calculate temporary vectorial force */
1495             tx               = _mm_mul_pd(fscal,dx01);
1496             ty               = _mm_mul_pd(fscal,dy01);
1497             tz               = _mm_mul_pd(fscal,dz01);
1498
1499             /* Update vectorial force */
1500             fix0             = _mm_add_pd(fix0,tx);
1501             fiy0             = _mm_add_pd(fiy0,ty);
1502             fiz0             = _mm_add_pd(fiz0,tz);
1503
1504             fjx1             = _mm_add_pd(fjx1,tx);
1505             fjy1             = _mm_add_pd(fjy1,ty);
1506             fjz1             = _mm_add_pd(fjz1,tz);
1507
1508             /**************************
1509              * CALCULATE INTERACTIONS *
1510              **************************/
1511
1512             /* COULOMB ELECTROSTATICS */
1513             velec            = _mm_mul_pd(qq02,rinv02);
1514             felec            = _mm_mul_pd(velec,rinvsq02);
1515
1516             fscal            = felec;
1517
1518             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1519
1520             /* Calculate temporary vectorial force */
1521             tx               = _mm_mul_pd(fscal,dx02);
1522             ty               = _mm_mul_pd(fscal,dy02);
1523             tz               = _mm_mul_pd(fscal,dz02);
1524
1525             /* Update vectorial force */
1526             fix0             = _mm_add_pd(fix0,tx);
1527             fiy0             = _mm_add_pd(fiy0,ty);
1528             fiz0             = _mm_add_pd(fiz0,tz);
1529
1530             fjx2             = _mm_add_pd(fjx2,tx);
1531             fjy2             = _mm_add_pd(fjy2,ty);
1532             fjz2             = _mm_add_pd(fjz2,tz);
1533
1534             /**************************
1535              * CALCULATE INTERACTIONS *
1536              **************************/
1537
1538             /* COULOMB ELECTROSTATICS */
1539             velec            = _mm_mul_pd(qq10,rinv10);
1540             felec            = _mm_mul_pd(velec,rinvsq10);
1541
1542             fscal            = felec;
1543
1544             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1545
1546             /* Calculate temporary vectorial force */
1547             tx               = _mm_mul_pd(fscal,dx10);
1548             ty               = _mm_mul_pd(fscal,dy10);
1549             tz               = _mm_mul_pd(fscal,dz10);
1550
1551             /* Update vectorial force */
1552             fix1             = _mm_add_pd(fix1,tx);
1553             fiy1             = _mm_add_pd(fiy1,ty);
1554             fiz1             = _mm_add_pd(fiz1,tz);
1555
1556             fjx0             = _mm_add_pd(fjx0,tx);
1557             fjy0             = _mm_add_pd(fjy0,ty);
1558             fjz0             = _mm_add_pd(fjz0,tz);
1559
1560             /**************************
1561              * CALCULATE INTERACTIONS *
1562              **************************/
1563
1564             /* COULOMB ELECTROSTATICS */
1565             velec            = _mm_mul_pd(qq11,rinv11);
1566             felec            = _mm_mul_pd(velec,rinvsq11);
1567
1568             fscal            = felec;
1569
1570             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1571
1572             /* Calculate temporary vectorial force */
1573             tx               = _mm_mul_pd(fscal,dx11);
1574             ty               = _mm_mul_pd(fscal,dy11);
1575             tz               = _mm_mul_pd(fscal,dz11);
1576
1577             /* Update vectorial force */
1578             fix1             = _mm_add_pd(fix1,tx);
1579             fiy1             = _mm_add_pd(fiy1,ty);
1580             fiz1             = _mm_add_pd(fiz1,tz);
1581
1582             fjx1             = _mm_add_pd(fjx1,tx);
1583             fjy1             = _mm_add_pd(fjy1,ty);
1584             fjz1             = _mm_add_pd(fjz1,tz);
1585
1586             /**************************
1587              * CALCULATE INTERACTIONS *
1588              **************************/
1589
1590             /* COULOMB ELECTROSTATICS */
1591             velec            = _mm_mul_pd(qq12,rinv12);
1592             felec            = _mm_mul_pd(velec,rinvsq12);
1593
1594             fscal            = felec;
1595
1596             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1597
1598             /* Calculate temporary vectorial force */
1599             tx               = _mm_mul_pd(fscal,dx12);
1600             ty               = _mm_mul_pd(fscal,dy12);
1601             tz               = _mm_mul_pd(fscal,dz12);
1602
1603             /* Update vectorial force */
1604             fix1             = _mm_add_pd(fix1,tx);
1605             fiy1             = _mm_add_pd(fiy1,ty);
1606             fiz1             = _mm_add_pd(fiz1,tz);
1607
1608             fjx2             = _mm_add_pd(fjx2,tx);
1609             fjy2             = _mm_add_pd(fjy2,ty);
1610             fjz2             = _mm_add_pd(fjz2,tz);
1611
1612             /**************************
1613              * CALCULATE INTERACTIONS *
1614              **************************/
1615
1616             /* COULOMB ELECTROSTATICS */
1617             velec            = _mm_mul_pd(qq20,rinv20);
1618             felec            = _mm_mul_pd(velec,rinvsq20);
1619
1620             fscal            = felec;
1621
1622             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1623
1624             /* Calculate temporary vectorial force */
1625             tx               = _mm_mul_pd(fscal,dx20);
1626             ty               = _mm_mul_pd(fscal,dy20);
1627             tz               = _mm_mul_pd(fscal,dz20);
1628
1629             /* Update vectorial force */
1630             fix2             = _mm_add_pd(fix2,tx);
1631             fiy2             = _mm_add_pd(fiy2,ty);
1632             fiz2             = _mm_add_pd(fiz2,tz);
1633
1634             fjx0             = _mm_add_pd(fjx0,tx);
1635             fjy0             = _mm_add_pd(fjy0,ty);
1636             fjz0             = _mm_add_pd(fjz0,tz);
1637
1638             /**************************
1639              * CALCULATE INTERACTIONS *
1640              **************************/
1641
1642             /* COULOMB ELECTROSTATICS */
1643             velec            = _mm_mul_pd(qq21,rinv21);
1644             felec            = _mm_mul_pd(velec,rinvsq21);
1645
1646             fscal            = felec;
1647
1648             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1649
1650             /* Calculate temporary vectorial force */
1651             tx               = _mm_mul_pd(fscal,dx21);
1652             ty               = _mm_mul_pd(fscal,dy21);
1653             tz               = _mm_mul_pd(fscal,dz21);
1654
1655             /* Update vectorial force */
1656             fix2             = _mm_add_pd(fix2,tx);
1657             fiy2             = _mm_add_pd(fiy2,ty);
1658             fiz2             = _mm_add_pd(fiz2,tz);
1659
1660             fjx1             = _mm_add_pd(fjx1,tx);
1661             fjy1             = _mm_add_pd(fjy1,ty);
1662             fjz1             = _mm_add_pd(fjz1,tz);
1663
1664             /**************************
1665              * CALCULATE INTERACTIONS *
1666              **************************/
1667
1668             /* COULOMB ELECTROSTATICS */
1669             velec            = _mm_mul_pd(qq22,rinv22);
1670             felec            = _mm_mul_pd(velec,rinvsq22);
1671
1672             fscal            = felec;
1673
1674             fscal            = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1675
1676             /* Calculate temporary vectorial force */
1677             tx               = _mm_mul_pd(fscal,dx22);
1678             ty               = _mm_mul_pd(fscal,dy22);
1679             tz               = _mm_mul_pd(fscal,dz22);
1680
1681             /* Update vectorial force */
1682             fix2             = _mm_add_pd(fix2,tx);
1683             fiy2             = _mm_add_pd(fiy2,ty);
1684             fiz2             = _mm_add_pd(fiz2,tz);
1685
1686             fjx2             = _mm_add_pd(fjx2,tx);
1687             fjy2             = _mm_add_pd(fjy2,ty);
1688             fjz2             = _mm_add_pd(fjz2,tz);
1689
1690             gmx_mm_decrement_3rvec_1ptr_swizzle_pd(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1691
1692             /* Inner loop uses 250 flops */
1693         }
1694
1695         /* End of innermost loop */
1696
1697         gmx_mm_update_iforce_3atom_swizzle_pd(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1698                                               f+i_coord_offset,fshift+i_shift_offset);
1699
1700         /* Increment number of inner iterations */
1701         inneriter                  += j_index_end - j_index_start;
1702
1703         /* Outer loop uses 18 flops */
1704     }
1705
1706     /* Increment number of outer iterations */
1707     outeriter        += nri;
1708
1709     /* Update outer/inner flops */
1710
1711     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*250);
1712 }