2 * Note: this file was generated by the Gromacs sse4_1_double kernel generator.
4 * This source code is part of
8 * Copyright (c) 2001-2012, The GROMACS Development Team
10 * Gromacs is a library for molecular simulation and trajectory analysis,
11 * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
12 * a full list of developers and information, check out http://www.gromacs.org
14 * This program is free software; you can redistribute it and/or modify it under
15 * the terms of the GNU Lesser General Public License as published by the Free
16 * Software Foundation; either version 2 of the License, or (at your option) any
19 * To help fund GROMACS development, we humbly ask that you cite
20 * the papers people have written on it - you can find them on the website.
28 #include "../nb_kernel.h"
29 #include "types/simple.h"
33 #include "gmx_math_x86_sse4_1_double.h"
34 #include "kernelutil_x86_sse4_1_double.h"
37 * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse4_1_double
38 * Electrostatics interaction: Coulomb
39 * VdW interaction: LennardJones
40 * Geometry: Water3-Water3
41 * Calculate force/pot: PotentialAndForce
44 nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sse4_1_double
45 (t_nblist * gmx_restrict nlist,
46 rvec * gmx_restrict xx,
47 rvec * gmx_restrict ff,
48 t_forcerec * gmx_restrict fr,
49 t_mdatoms * gmx_restrict mdatoms,
50 nb_kernel_data_t * gmx_restrict kernel_data,
51 t_nrnb * gmx_restrict nrnb)
53 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
54 * just 0 for non-waters.
55 * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
56 * jnr indices corresponding to data put in the four positions in the SIMD register.
58 int i_shift_offset,i_coord_offset,outeriter,inneriter;
59 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
61 int j_coord_offsetA,j_coord_offsetB;
62 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
64 real *shiftvec,*fshift,*x,*f;
65 __m128d tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
67 __m128d ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
69 __m128d ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
71 __m128d ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
72 int vdwjidx0A,vdwjidx0B;
73 __m128d jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
74 int vdwjidx1A,vdwjidx1B;
75 __m128d jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
76 int vdwjidx2A,vdwjidx2B;
77 __m128d jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
78 __m128d dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
79 __m128d dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
80 __m128d dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
81 __m128d dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
82 __m128d dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
83 __m128d dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
84 __m128d dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
85 __m128d dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
86 __m128d dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
87 __m128d velec,felec,velecsum,facel,crf,krf,krf2;
90 __m128d rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
93 __m128d one_sixth = _mm_set1_pd(1.0/6.0);
94 __m128d one_twelfth = _mm_set1_pd(1.0/12.0);
95 __m128d dummy_mask,cutoff_mask;
96 __m128d signbit = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
97 __m128d one = _mm_set1_pd(1.0);
98 __m128d two = _mm_set1_pd(2.0);
104 jindex = nlist->jindex;
106 shiftidx = nlist->shift;
108 shiftvec = fr->shift_vec[0];
109 fshift = fr->fshift[0];
110 facel = _mm_set1_pd(fr->epsfac);
111 charge = mdatoms->chargeA;
112 nvdwtype = fr->ntype;
114 vdwtype = mdatoms->typeA;
116 /* Setup water-specific parameters */
117 inr = nlist->iinr[0];
118 iq0 = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+0]));
119 iq1 = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+1]));
120 iq2 = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+2]));
121 vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
123 jq0 = _mm_set1_pd(charge[inr+0]);
124 jq1 = _mm_set1_pd(charge[inr+1]);
125 jq2 = _mm_set1_pd(charge[inr+2]);
126 vdwjidx0A = 2*vdwtype[inr+0];
127 qq00 = _mm_mul_pd(iq0,jq0);
128 c6_00 = _mm_set1_pd(vdwparam[vdwioffset0+vdwjidx0A]);
129 c12_00 = _mm_set1_pd(vdwparam[vdwioffset0+vdwjidx0A+1]);
130 qq01 = _mm_mul_pd(iq0,jq1);
131 qq02 = _mm_mul_pd(iq0,jq2);
132 qq10 = _mm_mul_pd(iq1,jq0);
133 qq11 = _mm_mul_pd(iq1,jq1);
134 qq12 = _mm_mul_pd(iq1,jq2);
135 qq20 = _mm_mul_pd(iq2,jq0);
136 qq21 = _mm_mul_pd(iq2,jq1);
137 qq22 = _mm_mul_pd(iq2,jq2);
139 /* Avoid stupid compiler warnings */
147 /* Start outer loop over neighborlists */
148 for(iidx=0; iidx<nri; iidx++)
150 /* Load shift vector for this list */
151 i_shift_offset = DIM*shiftidx[iidx];
153 /* Load limits for loop over neighbors */
154 j_index_start = jindex[iidx];
155 j_index_end = jindex[iidx+1];
157 /* Get outer coordinate index */
159 i_coord_offset = DIM*inr;
161 /* Load i particle coords and add shift vector */
162 gmx_mm_load_shift_and_3rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,
163 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
165 fix0 = _mm_setzero_pd();
166 fiy0 = _mm_setzero_pd();
167 fiz0 = _mm_setzero_pd();
168 fix1 = _mm_setzero_pd();
169 fiy1 = _mm_setzero_pd();
170 fiz1 = _mm_setzero_pd();
171 fix2 = _mm_setzero_pd();
172 fiy2 = _mm_setzero_pd();
173 fiz2 = _mm_setzero_pd();
175 /* Reset potential sums */
176 velecsum = _mm_setzero_pd();
177 vvdwsum = _mm_setzero_pd();
179 /* Start inner kernel loop */
180 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
183 /* Get j neighbor index, and coordinate index */
186 j_coord_offsetA = DIM*jnrA;
187 j_coord_offsetB = DIM*jnrB;
189 /* load j atom coordinates */
190 gmx_mm_load_3rvec_2ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
191 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
193 /* Calculate displacement vector */
194 dx00 = _mm_sub_pd(ix0,jx0);
195 dy00 = _mm_sub_pd(iy0,jy0);
196 dz00 = _mm_sub_pd(iz0,jz0);
197 dx01 = _mm_sub_pd(ix0,jx1);
198 dy01 = _mm_sub_pd(iy0,jy1);
199 dz01 = _mm_sub_pd(iz0,jz1);
200 dx02 = _mm_sub_pd(ix0,jx2);
201 dy02 = _mm_sub_pd(iy0,jy2);
202 dz02 = _mm_sub_pd(iz0,jz2);
203 dx10 = _mm_sub_pd(ix1,jx0);
204 dy10 = _mm_sub_pd(iy1,jy0);
205 dz10 = _mm_sub_pd(iz1,jz0);
206 dx11 = _mm_sub_pd(ix1,jx1);
207 dy11 = _mm_sub_pd(iy1,jy1);
208 dz11 = _mm_sub_pd(iz1,jz1);
209 dx12 = _mm_sub_pd(ix1,jx2);
210 dy12 = _mm_sub_pd(iy1,jy2);
211 dz12 = _mm_sub_pd(iz1,jz2);
212 dx20 = _mm_sub_pd(ix2,jx0);
213 dy20 = _mm_sub_pd(iy2,jy0);
214 dz20 = _mm_sub_pd(iz2,jz0);
215 dx21 = _mm_sub_pd(ix2,jx1);
216 dy21 = _mm_sub_pd(iy2,jy1);
217 dz21 = _mm_sub_pd(iz2,jz1);
218 dx22 = _mm_sub_pd(ix2,jx2);
219 dy22 = _mm_sub_pd(iy2,jy2);
220 dz22 = _mm_sub_pd(iz2,jz2);
222 /* Calculate squared distance and things based on it */
223 rsq00 = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
224 rsq01 = gmx_mm_calc_rsq_pd(dx01,dy01,dz01);
225 rsq02 = gmx_mm_calc_rsq_pd(dx02,dy02,dz02);
226 rsq10 = gmx_mm_calc_rsq_pd(dx10,dy10,dz10);
227 rsq11 = gmx_mm_calc_rsq_pd(dx11,dy11,dz11);
228 rsq12 = gmx_mm_calc_rsq_pd(dx12,dy12,dz12);
229 rsq20 = gmx_mm_calc_rsq_pd(dx20,dy20,dz20);
230 rsq21 = gmx_mm_calc_rsq_pd(dx21,dy21,dz21);
231 rsq22 = gmx_mm_calc_rsq_pd(dx22,dy22,dz22);
233 rinv00 = gmx_mm_invsqrt_pd(rsq00);
234 rinv01 = gmx_mm_invsqrt_pd(rsq01);
235 rinv02 = gmx_mm_invsqrt_pd(rsq02);
236 rinv10 = gmx_mm_invsqrt_pd(rsq10);
237 rinv11 = gmx_mm_invsqrt_pd(rsq11);
238 rinv12 = gmx_mm_invsqrt_pd(rsq12);
239 rinv20 = gmx_mm_invsqrt_pd(rsq20);
240 rinv21 = gmx_mm_invsqrt_pd(rsq21);
241 rinv22 = gmx_mm_invsqrt_pd(rsq22);
243 rinvsq00 = _mm_mul_pd(rinv00,rinv00);
244 rinvsq01 = _mm_mul_pd(rinv01,rinv01);
245 rinvsq02 = _mm_mul_pd(rinv02,rinv02);
246 rinvsq10 = _mm_mul_pd(rinv10,rinv10);
247 rinvsq11 = _mm_mul_pd(rinv11,rinv11);
248 rinvsq12 = _mm_mul_pd(rinv12,rinv12);
249 rinvsq20 = _mm_mul_pd(rinv20,rinv20);
250 rinvsq21 = _mm_mul_pd(rinv21,rinv21);
251 rinvsq22 = _mm_mul_pd(rinv22,rinv22);
253 fjx0 = _mm_setzero_pd();
254 fjy0 = _mm_setzero_pd();
255 fjz0 = _mm_setzero_pd();
256 fjx1 = _mm_setzero_pd();
257 fjy1 = _mm_setzero_pd();
258 fjz1 = _mm_setzero_pd();
259 fjx2 = _mm_setzero_pd();
260 fjy2 = _mm_setzero_pd();
261 fjz2 = _mm_setzero_pd();
263 /**************************
264 * CALCULATE INTERACTIONS *
265 **************************/
267 /* COULOMB ELECTROSTATICS */
268 velec = _mm_mul_pd(qq00,rinv00);
269 felec = _mm_mul_pd(velec,rinvsq00);
271 /* LENNARD-JONES DISPERSION/REPULSION */
273 rinvsix = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
274 vvdw6 = _mm_mul_pd(c6_00,rinvsix);
275 vvdw12 = _mm_mul_pd(c12_00,_mm_mul_pd(rinvsix,rinvsix));
276 vvdw = _mm_sub_pd( _mm_mul_pd(vvdw12,one_twelfth) , _mm_mul_pd(vvdw6,one_sixth) );
277 fvdw = _mm_mul_pd(_mm_sub_pd(vvdw12,vvdw6),rinvsq00);
279 /* Update potential sum for this i atom from the interaction with this j atom. */
280 velecsum = _mm_add_pd(velecsum,velec);
281 vvdwsum = _mm_add_pd(vvdwsum,vvdw);
283 fscal = _mm_add_pd(felec,fvdw);
285 /* Calculate temporary vectorial force */
286 tx = _mm_mul_pd(fscal,dx00);
287 ty = _mm_mul_pd(fscal,dy00);
288 tz = _mm_mul_pd(fscal,dz00);
290 /* Update vectorial force */
291 fix0 = _mm_add_pd(fix0,tx);
292 fiy0 = _mm_add_pd(fiy0,ty);
293 fiz0 = _mm_add_pd(fiz0,tz);
295 fjx0 = _mm_add_pd(fjx0,tx);
296 fjy0 = _mm_add_pd(fjy0,ty);
297 fjz0 = _mm_add_pd(fjz0,tz);
299 /**************************
300 * CALCULATE INTERACTIONS *
301 **************************/
303 /* COULOMB ELECTROSTATICS */
304 velec = _mm_mul_pd(qq01,rinv01);
305 felec = _mm_mul_pd(velec,rinvsq01);
307 /* Update potential sum for this i atom from the interaction with this j atom. */
308 velecsum = _mm_add_pd(velecsum,velec);
312 /* Calculate temporary vectorial force */
313 tx = _mm_mul_pd(fscal,dx01);
314 ty = _mm_mul_pd(fscal,dy01);
315 tz = _mm_mul_pd(fscal,dz01);
317 /* Update vectorial force */
318 fix0 = _mm_add_pd(fix0,tx);
319 fiy0 = _mm_add_pd(fiy0,ty);
320 fiz0 = _mm_add_pd(fiz0,tz);
322 fjx1 = _mm_add_pd(fjx1,tx);
323 fjy1 = _mm_add_pd(fjy1,ty);
324 fjz1 = _mm_add_pd(fjz1,tz);
326 /**************************
327 * CALCULATE INTERACTIONS *
328 **************************/
330 /* COULOMB ELECTROSTATICS */
331 velec = _mm_mul_pd(qq02,rinv02);
332 felec = _mm_mul_pd(velec,rinvsq02);
334 /* Update potential sum for this i atom from the interaction with this j atom. */
335 velecsum = _mm_add_pd(velecsum,velec);
339 /* Calculate temporary vectorial force */
340 tx = _mm_mul_pd(fscal,dx02);
341 ty = _mm_mul_pd(fscal,dy02);
342 tz = _mm_mul_pd(fscal,dz02);
344 /* Update vectorial force */
345 fix0 = _mm_add_pd(fix0,tx);
346 fiy0 = _mm_add_pd(fiy0,ty);
347 fiz0 = _mm_add_pd(fiz0,tz);
349 fjx2 = _mm_add_pd(fjx2,tx);
350 fjy2 = _mm_add_pd(fjy2,ty);
351 fjz2 = _mm_add_pd(fjz2,tz);
353 /**************************
354 * CALCULATE INTERACTIONS *
355 **************************/
357 /* COULOMB ELECTROSTATICS */
358 velec = _mm_mul_pd(qq10,rinv10);
359 felec = _mm_mul_pd(velec,rinvsq10);
361 /* Update potential sum for this i atom from the interaction with this j atom. */
362 velecsum = _mm_add_pd(velecsum,velec);
366 /* Calculate temporary vectorial force */
367 tx = _mm_mul_pd(fscal,dx10);
368 ty = _mm_mul_pd(fscal,dy10);
369 tz = _mm_mul_pd(fscal,dz10);
371 /* Update vectorial force */
372 fix1 = _mm_add_pd(fix1,tx);
373 fiy1 = _mm_add_pd(fiy1,ty);
374 fiz1 = _mm_add_pd(fiz1,tz);
376 fjx0 = _mm_add_pd(fjx0,tx);
377 fjy0 = _mm_add_pd(fjy0,ty);
378 fjz0 = _mm_add_pd(fjz0,tz);
380 /**************************
381 * CALCULATE INTERACTIONS *
382 **************************/
384 /* COULOMB ELECTROSTATICS */
385 velec = _mm_mul_pd(qq11,rinv11);
386 felec = _mm_mul_pd(velec,rinvsq11);
388 /* Update potential sum for this i atom from the interaction with this j atom. */
389 velecsum = _mm_add_pd(velecsum,velec);
393 /* Calculate temporary vectorial force */
394 tx = _mm_mul_pd(fscal,dx11);
395 ty = _mm_mul_pd(fscal,dy11);
396 tz = _mm_mul_pd(fscal,dz11);
398 /* Update vectorial force */
399 fix1 = _mm_add_pd(fix1,tx);
400 fiy1 = _mm_add_pd(fiy1,ty);
401 fiz1 = _mm_add_pd(fiz1,tz);
403 fjx1 = _mm_add_pd(fjx1,tx);
404 fjy1 = _mm_add_pd(fjy1,ty);
405 fjz1 = _mm_add_pd(fjz1,tz);
407 /**************************
408 * CALCULATE INTERACTIONS *
409 **************************/
411 /* COULOMB ELECTROSTATICS */
412 velec = _mm_mul_pd(qq12,rinv12);
413 felec = _mm_mul_pd(velec,rinvsq12);
415 /* Update potential sum for this i atom from the interaction with this j atom. */
416 velecsum = _mm_add_pd(velecsum,velec);
420 /* Calculate temporary vectorial force */
421 tx = _mm_mul_pd(fscal,dx12);
422 ty = _mm_mul_pd(fscal,dy12);
423 tz = _mm_mul_pd(fscal,dz12);
425 /* Update vectorial force */
426 fix1 = _mm_add_pd(fix1,tx);
427 fiy1 = _mm_add_pd(fiy1,ty);
428 fiz1 = _mm_add_pd(fiz1,tz);
430 fjx2 = _mm_add_pd(fjx2,tx);
431 fjy2 = _mm_add_pd(fjy2,ty);
432 fjz2 = _mm_add_pd(fjz2,tz);
434 /**************************
435 * CALCULATE INTERACTIONS *
436 **************************/
438 /* COULOMB ELECTROSTATICS */
439 velec = _mm_mul_pd(qq20,rinv20);
440 felec = _mm_mul_pd(velec,rinvsq20);
442 /* Update potential sum for this i atom from the interaction with this j atom. */
443 velecsum = _mm_add_pd(velecsum,velec);
447 /* Calculate temporary vectorial force */
448 tx = _mm_mul_pd(fscal,dx20);
449 ty = _mm_mul_pd(fscal,dy20);
450 tz = _mm_mul_pd(fscal,dz20);
452 /* Update vectorial force */
453 fix2 = _mm_add_pd(fix2,tx);
454 fiy2 = _mm_add_pd(fiy2,ty);
455 fiz2 = _mm_add_pd(fiz2,tz);
457 fjx0 = _mm_add_pd(fjx0,tx);
458 fjy0 = _mm_add_pd(fjy0,ty);
459 fjz0 = _mm_add_pd(fjz0,tz);
461 /**************************
462 * CALCULATE INTERACTIONS *
463 **************************/
465 /* COULOMB ELECTROSTATICS */
466 velec = _mm_mul_pd(qq21,rinv21);
467 felec = _mm_mul_pd(velec,rinvsq21);
469 /* Update potential sum for this i atom from the interaction with this j atom. */
470 velecsum = _mm_add_pd(velecsum,velec);
474 /* Calculate temporary vectorial force */
475 tx = _mm_mul_pd(fscal,dx21);
476 ty = _mm_mul_pd(fscal,dy21);
477 tz = _mm_mul_pd(fscal,dz21);
479 /* Update vectorial force */
480 fix2 = _mm_add_pd(fix2,tx);
481 fiy2 = _mm_add_pd(fiy2,ty);
482 fiz2 = _mm_add_pd(fiz2,tz);
484 fjx1 = _mm_add_pd(fjx1,tx);
485 fjy1 = _mm_add_pd(fjy1,ty);
486 fjz1 = _mm_add_pd(fjz1,tz);
488 /**************************
489 * CALCULATE INTERACTIONS *
490 **************************/
492 /* COULOMB ELECTROSTATICS */
493 velec = _mm_mul_pd(qq22,rinv22);
494 felec = _mm_mul_pd(velec,rinvsq22);
496 /* Update potential sum for this i atom from the interaction with this j atom. */
497 velecsum = _mm_add_pd(velecsum,velec);
501 /* Calculate temporary vectorial force */
502 tx = _mm_mul_pd(fscal,dx22);
503 ty = _mm_mul_pd(fscal,dy22);
504 tz = _mm_mul_pd(fscal,dz22);
506 /* Update vectorial force */
507 fix2 = _mm_add_pd(fix2,tx);
508 fiy2 = _mm_add_pd(fiy2,ty);
509 fiz2 = _mm_add_pd(fiz2,tz);
511 fjx2 = _mm_add_pd(fjx2,tx);
512 fjy2 = _mm_add_pd(fjy2,ty);
513 fjz2 = _mm_add_pd(fjz2,tz);
515 gmx_mm_decrement_3rvec_2ptr_swizzle_pd(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
517 /* Inner loop uses 264 flops */
524 j_coord_offsetA = DIM*jnrA;
526 /* load j atom coordinates */
527 gmx_mm_load_3rvec_1ptr_swizzle_pd(x+j_coord_offsetA,
528 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
530 /* Calculate displacement vector */
531 dx00 = _mm_sub_pd(ix0,jx0);
532 dy00 = _mm_sub_pd(iy0,jy0);
533 dz00 = _mm_sub_pd(iz0,jz0);
534 dx01 = _mm_sub_pd(ix0,jx1);
535 dy01 = _mm_sub_pd(iy0,jy1);
536 dz01 = _mm_sub_pd(iz0,jz1);
537 dx02 = _mm_sub_pd(ix0,jx2);
538 dy02 = _mm_sub_pd(iy0,jy2);
539 dz02 = _mm_sub_pd(iz0,jz2);
540 dx10 = _mm_sub_pd(ix1,jx0);
541 dy10 = _mm_sub_pd(iy1,jy0);
542 dz10 = _mm_sub_pd(iz1,jz0);
543 dx11 = _mm_sub_pd(ix1,jx1);
544 dy11 = _mm_sub_pd(iy1,jy1);
545 dz11 = _mm_sub_pd(iz1,jz1);
546 dx12 = _mm_sub_pd(ix1,jx2);
547 dy12 = _mm_sub_pd(iy1,jy2);
548 dz12 = _mm_sub_pd(iz1,jz2);
549 dx20 = _mm_sub_pd(ix2,jx0);
550 dy20 = _mm_sub_pd(iy2,jy0);
551 dz20 = _mm_sub_pd(iz2,jz0);
552 dx21 = _mm_sub_pd(ix2,jx1);
553 dy21 = _mm_sub_pd(iy2,jy1);
554 dz21 = _mm_sub_pd(iz2,jz1);
555 dx22 = _mm_sub_pd(ix2,jx2);
556 dy22 = _mm_sub_pd(iy2,jy2);
557 dz22 = _mm_sub_pd(iz2,jz2);
559 /* Calculate squared distance and things based on it */
560 rsq00 = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
561 rsq01 = gmx_mm_calc_rsq_pd(dx01,dy01,dz01);
562 rsq02 = gmx_mm_calc_rsq_pd(dx02,dy02,dz02);
563 rsq10 = gmx_mm_calc_rsq_pd(dx10,dy10,dz10);
564 rsq11 = gmx_mm_calc_rsq_pd(dx11,dy11,dz11);
565 rsq12 = gmx_mm_calc_rsq_pd(dx12,dy12,dz12);
566 rsq20 = gmx_mm_calc_rsq_pd(dx20,dy20,dz20);
567 rsq21 = gmx_mm_calc_rsq_pd(dx21,dy21,dz21);
568 rsq22 = gmx_mm_calc_rsq_pd(dx22,dy22,dz22);
570 rinv00 = gmx_mm_invsqrt_pd(rsq00);
571 rinv01 = gmx_mm_invsqrt_pd(rsq01);
572 rinv02 = gmx_mm_invsqrt_pd(rsq02);
573 rinv10 = gmx_mm_invsqrt_pd(rsq10);
574 rinv11 = gmx_mm_invsqrt_pd(rsq11);
575 rinv12 = gmx_mm_invsqrt_pd(rsq12);
576 rinv20 = gmx_mm_invsqrt_pd(rsq20);
577 rinv21 = gmx_mm_invsqrt_pd(rsq21);
578 rinv22 = gmx_mm_invsqrt_pd(rsq22);
580 rinvsq00 = _mm_mul_pd(rinv00,rinv00);
581 rinvsq01 = _mm_mul_pd(rinv01,rinv01);
582 rinvsq02 = _mm_mul_pd(rinv02,rinv02);
583 rinvsq10 = _mm_mul_pd(rinv10,rinv10);
584 rinvsq11 = _mm_mul_pd(rinv11,rinv11);
585 rinvsq12 = _mm_mul_pd(rinv12,rinv12);
586 rinvsq20 = _mm_mul_pd(rinv20,rinv20);
587 rinvsq21 = _mm_mul_pd(rinv21,rinv21);
588 rinvsq22 = _mm_mul_pd(rinv22,rinv22);
590 fjx0 = _mm_setzero_pd();
591 fjy0 = _mm_setzero_pd();
592 fjz0 = _mm_setzero_pd();
593 fjx1 = _mm_setzero_pd();
594 fjy1 = _mm_setzero_pd();
595 fjz1 = _mm_setzero_pd();
596 fjx2 = _mm_setzero_pd();
597 fjy2 = _mm_setzero_pd();
598 fjz2 = _mm_setzero_pd();
600 /**************************
601 * CALCULATE INTERACTIONS *
602 **************************/
604 /* COULOMB ELECTROSTATICS */
605 velec = _mm_mul_pd(qq00,rinv00);
606 felec = _mm_mul_pd(velec,rinvsq00);
608 /* LENNARD-JONES DISPERSION/REPULSION */
610 rinvsix = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
611 vvdw6 = _mm_mul_pd(c6_00,rinvsix);
612 vvdw12 = _mm_mul_pd(c12_00,_mm_mul_pd(rinvsix,rinvsix));
613 vvdw = _mm_sub_pd( _mm_mul_pd(vvdw12,one_twelfth) , _mm_mul_pd(vvdw6,one_sixth) );
614 fvdw = _mm_mul_pd(_mm_sub_pd(vvdw12,vvdw6),rinvsq00);
616 /* Update potential sum for this i atom from the interaction with this j atom. */
617 velec = _mm_unpacklo_pd(velec,_mm_setzero_pd());
618 velecsum = _mm_add_pd(velecsum,velec);
619 vvdw = _mm_unpacklo_pd(vvdw,_mm_setzero_pd());
620 vvdwsum = _mm_add_pd(vvdwsum,vvdw);
622 fscal = _mm_add_pd(felec,fvdw);
624 fscal = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
626 /* Calculate temporary vectorial force */
627 tx = _mm_mul_pd(fscal,dx00);
628 ty = _mm_mul_pd(fscal,dy00);
629 tz = _mm_mul_pd(fscal,dz00);
631 /* Update vectorial force */
632 fix0 = _mm_add_pd(fix0,tx);
633 fiy0 = _mm_add_pd(fiy0,ty);
634 fiz0 = _mm_add_pd(fiz0,tz);
636 fjx0 = _mm_add_pd(fjx0,tx);
637 fjy0 = _mm_add_pd(fjy0,ty);
638 fjz0 = _mm_add_pd(fjz0,tz);
640 /**************************
641 * CALCULATE INTERACTIONS *
642 **************************/
644 /* COULOMB ELECTROSTATICS */
645 velec = _mm_mul_pd(qq01,rinv01);
646 felec = _mm_mul_pd(velec,rinvsq01);
648 /* Update potential sum for this i atom from the interaction with this j atom. */
649 velec = _mm_unpacklo_pd(velec,_mm_setzero_pd());
650 velecsum = _mm_add_pd(velecsum,velec);
654 fscal = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
656 /* Calculate temporary vectorial force */
657 tx = _mm_mul_pd(fscal,dx01);
658 ty = _mm_mul_pd(fscal,dy01);
659 tz = _mm_mul_pd(fscal,dz01);
661 /* Update vectorial force */
662 fix0 = _mm_add_pd(fix0,tx);
663 fiy0 = _mm_add_pd(fiy0,ty);
664 fiz0 = _mm_add_pd(fiz0,tz);
666 fjx1 = _mm_add_pd(fjx1,tx);
667 fjy1 = _mm_add_pd(fjy1,ty);
668 fjz1 = _mm_add_pd(fjz1,tz);
670 /**************************
671 * CALCULATE INTERACTIONS *
672 **************************/
674 /* COULOMB ELECTROSTATICS */
675 velec = _mm_mul_pd(qq02,rinv02);
676 felec = _mm_mul_pd(velec,rinvsq02);
678 /* Update potential sum for this i atom from the interaction with this j atom. */
679 velec = _mm_unpacklo_pd(velec,_mm_setzero_pd());
680 velecsum = _mm_add_pd(velecsum,velec);
684 fscal = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
686 /* Calculate temporary vectorial force */
687 tx = _mm_mul_pd(fscal,dx02);
688 ty = _mm_mul_pd(fscal,dy02);
689 tz = _mm_mul_pd(fscal,dz02);
691 /* Update vectorial force */
692 fix0 = _mm_add_pd(fix0,tx);
693 fiy0 = _mm_add_pd(fiy0,ty);
694 fiz0 = _mm_add_pd(fiz0,tz);
696 fjx2 = _mm_add_pd(fjx2,tx);
697 fjy2 = _mm_add_pd(fjy2,ty);
698 fjz2 = _mm_add_pd(fjz2,tz);
700 /**************************
701 * CALCULATE INTERACTIONS *
702 **************************/
704 /* COULOMB ELECTROSTATICS */
705 velec = _mm_mul_pd(qq10,rinv10);
706 felec = _mm_mul_pd(velec,rinvsq10);
708 /* Update potential sum for this i atom from the interaction with this j atom. */
709 velec = _mm_unpacklo_pd(velec,_mm_setzero_pd());
710 velecsum = _mm_add_pd(velecsum,velec);
714 fscal = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
716 /* Calculate temporary vectorial force */
717 tx = _mm_mul_pd(fscal,dx10);
718 ty = _mm_mul_pd(fscal,dy10);
719 tz = _mm_mul_pd(fscal,dz10);
721 /* Update vectorial force */
722 fix1 = _mm_add_pd(fix1,tx);
723 fiy1 = _mm_add_pd(fiy1,ty);
724 fiz1 = _mm_add_pd(fiz1,tz);
726 fjx0 = _mm_add_pd(fjx0,tx);
727 fjy0 = _mm_add_pd(fjy0,ty);
728 fjz0 = _mm_add_pd(fjz0,tz);
730 /**************************
731 * CALCULATE INTERACTIONS *
732 **************************/
734 /* COULOMB ELECTROSTATICS */
735 velec = _mm_mul_pd(qq11,rinv11);
736 felec = _mm_mul_pd(velec,rinvsq11);
738 /* Update potential sum for this i atom from the interaction with this j atom. */
739 velec = _mm_unpacklo_pd(velec,_mm_setzero_pd());
740 velecsum = _mm_add_pd(velecsum,velec);
744 fscal = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
746 /* Calculate temporary vectorial force */
747 tx = _mm_mul_pd(fscal,dx11);
748 ty = _mm_mul_pd(fscal,dy11);
749 tz = _mm_mul_pd(fscal,dz11);
751 /* Update vectorial force */
752 fix1 = _mm_add_pd(fix1,tx);
753 fiy1 = _mm_add_pd(fiy1,ty);
754 fiz1 = _mm_add_pd(fiz1,tz);
756 fjx1 = _mm_add_pd(fjx1,tx);
757 fjy1 = _mm_add_pd(fjy1,ty);
758 fjz1 = _mm_add_pd(fjz1,tz);
760 /**************************
761 * CALCULATE INTERACTIONS *
762 **************************/
764 /* COULOMB ELECTROSTATICS */
765 velec = _mm_mul_pd(qq12,rinv12);
766 felec = _mm_mul_pd(velec,rinvsq12);
768 /* Update potential sum for this i atom from the interaction with this j atom. */
769 velec = _mm_unpacklo_pd(velec,_mm_setzero_pd());
770 velecsum = _mm_add_pd(velecsum,velec);
774 fscal = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
776 /* Calculate temporary vectorial force */
777 tx = _mm_mul_pd(fscal,dx12);
778 ty = _mm_mul_pd(fscal,dy12);
779 tz = _mm_mul_pd(fscal,dz12);
781 /* Update vectorial force */
782 fix1 = _mm_add_pd(fix1,tx);
783 fiy1 = _mm_add_pd(fiy1,ty);
784 fiz1 = _mm_add_pd(fiz1,tz);
786 fjx2 = _mm_add_pd(fjx2,tx);
787 fjy2 = _mm_add_pd(fjy2,ty);
788 fjz2 = _mm_add_pd(fjz2,tz);
790 /**************************
791 * CALCULATE INTERACTIONS *
792 **************************/
794 /* COULOMB ELECTROSTATICS */
795 velec = _mm_mul_pd(qq20,rinv20);
796 felec = _mm_mul_pd(velec,rinvsq20);
798 /* Update potential sum for this i atom from the interaction with this j atom. */
799 velec = _mm_unpacklo_pd(velec,_mm_setzero_pd());
800 velecsum = _mm_add_pd(velecsum,velec);
804 fscal = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
806 /* Calculate temporary vectorial force */
807 tx = _mm_mul_pd(fscal,dx20);
808 ty = _mm_mul_pd(fscal,dy20);
809 tz = _mm_mul_pd(fscal,dz20);
811 /* Update vectorial force */
812 fix2 = _mm_add_pd(fix2,tx);
813 fiy2 = _mm_add_pd(fiy2,ty);
814 fiz2 = _mm_add_pd(fiz2,tz);
816 fjx0 = _mm_add_pd(fjx0,tx);
817 fjy0 = _mm_add_pd(fjy0,ty);
818 fjz0 = _mm_add_pd(fjz0,tz);
820 /**************************
821 * CALCULATE INTERACTIONS *
822 **************************/
824 /* COULOMB ELECTROSTATICS */
825 velec = _mm_mul_pd(qq21,rinv21);
826 felec = _mm_mul_pd(velec,rinvsq21);
828 /* Update potential sum for this i atom from the interaction with this j atom. */
829 velec = _mm_unpacklo_pd(velec,_mm_setzero_pd());
830 velecsum = _mm_add_pd(velecsum,velec);
834 fscal = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
836 /* Calculate temporary vectorial force */
837 tx = _mm_mul_pd(fscal,dx21);
838 ty = _mm_mul_pd(fscal,dy21);
839 tz = _mm_mul_pd(fscal,dz21);
841 /* Update vectorial force */
842 fix2 = _mm_add_pd(fix2,tx);
843 fiy2 = _mm_add_pd(fiy2,ty);
844 fiz2 = _mm_add_pd(fiz2,tz);
846 fjx1 = _mm_add_pd(fjx1,tx);
847 fjy1 = _mm_add_pd(fjy1,ty);
848 fjz1 = _mm_add_pd(fjz1,tz);
850 /**************************
851 * CALCULATE INTERACTIONS *
852 **************************/
854 /* COULOMB ELECTROSTATICS */
855 velec = _mm_mul_pd(qq22,rinv22);
856 felec = _mm_mul_pd(velec,rinvsq22);
858 /* Update potential sum for this i atom from the interaction with this j atom. */
859 velec = _mm_unpacklo_pd(velec,_mm_setzero_pd());
860 velecsum = _mm_add_pd(velecsum,velec);
864 fscal = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
866 /* Calculate temporary vectorial force */
867 tx = _mm_mul_pd(fscal,dx22);
868 ty = _mm_mul_pd(fscal,dy22);
869 tz = _mm_mul_pd(fscal,dz22);
871 /* Update vectorial force */
872 fix2 = _mm_add_pd(fix2,tx);
873 fiy2 = _mm_add_pd(fiy2,ty);
874 fiz2 = _mm_add_pd(fiz2,tz);
876 fjx2 = _mm_add_pd(fjx2,tx);
877 fjy2 = _mm_add_pd(fjy2,ty);
878 fjz2 = _mm_add_pd(fjz2,tz);
880 gmx_mm_decrement_3rvec_1ptr_swizzle_pd(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
882 /* Inner loop uses 264 flops */
885 /* End of innermost loop */
887 gmx_mm_update_iforce_3atom_swizzle_pd(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
888 f+i_coord_offset,fshift+i_shift_offset);
891 /* Update potential energies */
892 gmx_mm_update_1pot_pd(velecsum,kernel_data->energygrp_elec+ggid);
893 gmx_mm_update_1pot_pd(vvdwsum,kernel_data->energygrp_vdw+ggid);
895 /* Increment number of inner iterations */
896 inneriter += j_index_end - j_index_start;
898 /* Outer loop uses 20 flops */
901 /* Increment number of outer iterations */
904 /* Update outer/inner flops */
906 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*264);
909 * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse4_1_double
910 * Electrostatics interaction: Coulomb
911 * VdW interaction: LennardJones
912 * Geometry: Water3-Water3
913 * Calculate force/pot: Force
916 nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sse4_1_double
917 (t_nblist * gmx_restrict nlist,
918 rvec * gmx_restrict xx,
919 rvec * gmx_restrict ff,
920 t_forcerec * gmx_restrict fr,
921 t_mdatoms * gmx_restrict mdatoms,
922 nb_kernel_data_t * gmx_restrict kernel_data,
923 t_nrnb * gmx_restrict nrnb)
925 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
926 * just 0 for non-waters.
927 * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
928 * jnr indices corresponding to data put in the four positions in the SIMD register.
930 int i_shift_offset,i_coord_offset,outeriter,inneriter;
931 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
933 int j_coord_offsetA,j_coord_offsetB;
934 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
936 real *shiftvec,*fshift,*x,*f;
937 __m128d tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
939 __m128d ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
941 __m128d ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
943 __m128d ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
944 int vdwjidx0A,vdwjidx0B;
945 __m128d jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
946 int vdwjidx1A,vdwjidx1B;
947 __m128d jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
948 int vdwjidx2A,vdwjidx2B;
949 __m128d jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
950 __m128d dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
951 __m128d dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
952 __m128d dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
953 __m128d dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
954 __m128d dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
955 __m128d dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
956 __m128d dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
957 __m128d dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
958 __m128d dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
959 __m128d velec,felec,velecsum,facel,crf,krf,krf2;
962 __m128d rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
965 __m128d one_sixth = _mm_set1_pd(1.0/6.0);
966 __m128d one_twelfth = _mm_set1_pd(1.0/12.0);
967 __m128d dummy_mask,cutoff_mask;
968 __m128d signbit = gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
969 __m128d one = _mm_set1_pd(1.0);
970 __m128d two = _mm_set1_pd(2.0);
976 jindex = nlist->jindex;
978 shiftidx = nlist->shift;
980 shiftvec = fr->shift_vec[0];
981 fshift = fr->fshift[0];
982 facel = _mm_set1_pd(fr->epsfac);
983 charge = mdatoms->chargeA;
984 nvdwtype = fr->ntype;
986 vdwtype = mdatoms->typeA;
988 /* Setup water-specific parameters */
989 inr = nlist->iinr[0];
990 iq0 = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+0]));
991 iq1 = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+1]));
992 iq2 = _mm_mul_pd(facel,_mm_set1_pd(charge[inr+2]));
993 vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
995 jq0 = _mm_set1_pd(charge[inr+0]);
996 jq1 = _mm_set1_pd(charge[inr+1]);
997 jq2 = _mm_set1_pd(charge[inr+2]);
998 vdwjidx0A = 2*vdwtype[inr+0];
999 qq00 = _mm_mul_pd(iq0,jq0);
1000 c6_00 = _mm_set1_pd(vdwparam[vdwioffset0+vdwjidx0A]);
1001 c12_00 = _mm_set1_pd(vdwparam[vdwioffset0+vdwjidx0A+1]);
1002 qq01 = _mm_mul_pd(iq0,jq1);
1003 qq02 = _mm_mul_pd(iq0,jq2);
1004 qq10 = _mm_mul_pd(iq1,jq0);
1005 qq11 = _mm_mul_pd(iq1,jq1);
1006 qq12 = _mm_mul_pd(iq1,jq2);
1007 qq20 = _mm_mul_pd(iq2,jq0);
1008 qq21 = _mm_mul_pd(iq2,jq1);
1009 qq22 = _mm_mul_pd(iq2,jq2);
1011 /* Avoid stupid compiler warnings */
1013 j_coord_offsetA = 0;
1014 j_coord_offsetB = 0;
1019 /* Start outer loop over neighborlists */
1020 for(iidx=0; iidx<nri; iidx++)
1022 /* Load shift vector for this list */
1023 i_shift_offset = DIM*shiftidx[iidx];
1025 /* Load limits for loop over neighbors */
1026 j_index_start = jindex[iidx];
1027 j_index_end = jindex[iidx+1];
1029 /* Get outer coordinate index */
1031 i_coord_offset = DIM*inr;
1033 /* Load i particle coords and add shift vector */
1034 gmx_mm_load_shift_and_3rvec_broadcast_pd(shiftvec+i_shift_offset,x+i_coord_offset,
1035 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
1037 fix0 = _mm_setzero_pd();
1038 fiy0 = _mm_setzero_pd();
1039 fiz0 = _mm_setzero_pd();
1040 fix1 = _mm_setzero_pd();
1041 fiy1 = _mm_setzero_pd();
1042 fiz1 = _mm_setzero_pd();
1043 fix2 = _mm_setzero_pd();
1044 fiy2 = _mm_setzero_pd();
1045 fiz2 = _mm_setzero_pd();
1047 /* Start inner kernel loop */
1048 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1051 /* Get j neighbor index, and coordinate index */
1053 jnrB = jjnr[jidx+1];
1054 j_coord_offsetA = DIM*jnrA;
1055 j_coord_offsetB = DIM*jnrB;
1057 /* load j atom coordinates */
1058 gmx_mm_load_3rvec_2ptr_swizzle_pd(x+j_coord_offsetA,x+j_coord_offsetB,
1059 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1061 /* Calculate displacement vector */
1062 dx00 = _mm_sub_pd(ix0,jx0);
1063 dy00 = _mm_sub_pd(iy0,jy0);
1064 dz00 = _mm_sub_pd(iz0,jz0);
1065 dx01 = _mm_sub_pd(ix0,jx1);
1066 dy01 = _mm_sub_pd(iy0,jy1);
1067 dz01 = _mm_sub_pd(iz0,jz1);
1068 dx02 = _mm_sub_pd(ix0,jx2);
1069 dy02 = _mm_sub_pd(iy0,jy2);
1070 dz02 = _mm_sub_pd(iz0,jz2);
1071 dx10 = _mm_sub_pd(ix1,jx0);
1072 dy10 = _mm_sub_pd(iy1,jy0);
1073 dz10 = _mm_sub_pd(iz1,jz0);
1074 dx11 = _mm_sub_pd(ix1,jx1);
1075 dy11 = _mm_sub_pd(iy1,jy1);
1076 dz11 = _mm_sub_pd(iz1,jz1);
1077 dx12 = _mm_sub_pd(ix1,jx2);
1078 dy12 = _mm_sub_pd(iy1,jy2);
1079 dz12 = _mm_sub_pd(iz1,jz2);
1080 dx20 = _mm_sub_pd(ix2,jx0);
1081 dy20 = _mm_sub_pd(iy2,jy0);
1082 dz20 = _mm_sub_pd(iz2,jz0);
1083 dx21 = _mm_sub_pd(ix2,jx1);
1084 dy21 = _mm_sub_pd(iy2,jy1);
1085 dz21 = _mm_sub_pd(iz2,jz1);
1086 dx22 = _mm_sub_pd(ix2,jx2);
1087 dy22 = _mm_sub_pd(iy2,jy2);
1088 dz22 = _mm_sub_pd(iz2,jz2);
1090 /* Calculate squared distance and things based on it */
1091 rsq00 = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
1092 rsq01 = gmx_mm_calc_rsq_pd(dx01,dy01,dz01);
1093 rsq02 = gmx_mm_calc_rsq_pd(dx02,dy02,dz02);
1094 rsq10 = gmx_mm_calc_rsq_pd(dx10,dy10,dz10);
1095 rsq11 = gmx_mm_calc_rsq_pd(dx11,dy11,dz11);
1096 rsq12 = gmx_mm_calc_rsq_pd(dx12,dy12,dz12);
1097 rsq20 = gmx_mm_calc_rsq_pd(dx20,dy20,dz20);
1098 rsq21 = gmx_mm_calc_rsq_pd(dx21,dy21,dz21);
1099 rsq22 = gmx_mm_calc_rsq_pd(dx22,dy22,dz22);
1101 rinv00 = gmx_mm_invsqrt_pd(rsq00);
1102 rinv01 = gmx_mm_invsqrt_pd(rsq01);
1103 rinv02 = gmx_mm_invsqrt_pd(rsq02);
1104 rinv10 = gmx_mm_invsqrt_pd(rsq10);
1105 rinv11 = gmx_mm_invsqrt_pd(rsq11);
1106 rinv12 = gmx_mm_invsqrt_pd(rsq12);
1107 rinv20 = gmx_mm_invsqrt_pd(rsq20);
1108 rinv21 = gmx_mm_invsqrt_pd(rsq21);
1109 rinv22 = gmx_mm_invsqrt_pd(rsq22);
1111 rinvsq00 = _mm_mul_pd(rinv00,rinv00);
1112 rinvsq01 = _mm_mul_pd(rinv01,rinv01);
1113 rinvsq02 = _mm_mul_pd(rinv02,rinv02);
1114 rinvsq10 = _mm_mul_pd(rinv10,rinv10);
1115 rinvsq11 = _mm_mul_pd(rinv11,rinv11);
1116 rinvsq12 = _mm_mul_pd(rinv12,rinv12);
1117 rinvsq20 = _mm_mul_pd(rinv20,rinv20);
1118 rinvsq21 = _mm_mul_pd(rinv21,rinv21);
1119 rinvsq22 = _mm_mul_pd(rinv22,rinv22);
1121 fjx0 = _mm_setzero_pd();
1122 fjy0 = _mm_setzero_pd();
1123 fjz0 = _mm_setzero_pd();
1124 fjx1 = _mm_setzero_pd();
1125 fjy1 = _mm_setzero_pd();
1126 fjz1 = _mm_setzero_pd();
1127 fjx2 = _mm_setzero_pd();
1128 fjy2 = _mm_setzero_pd();
1129 fjz2 = _mm_setzero_pd();
1131 /**************************
1132 * CALCULATE INTERACTIONS *
1133 **************************/
1135 /* COULOMB ELECTROSTATICS */
1136 velec = _mm_mul_pd(qq00,rinv00);
1137 felec = _mm_mul_pd(velec,rinvsq00);
1139 /* LENNARD-JONES DISPERSION/REPULSION */
1141 rinvsix = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
1142 fvdw = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(c12_00,rinvsix),c6_00),_mm_mul_pd(rinvsix,rinvsq00));
1144 fscal = _mm_add_pd(felec,fvdw);
1146 /* Calculate temporary vectorial force */
1147 tx = _mm_mul_pd(fscal,dx00);
1148 ty = _mm_mul_pd(fscal,dy00);
1149 tz = _mm_mul_pd(fscal,dz00);
1151 /* Update vectorial force */
1152 fix0 = _mm_add_pd(fix0,tx);
1153 fiy0 = _mm_add_pd(fiy0,ty);
1154 fiz0 = _mm_add_pd(fiz0,tz);
1156 fjx0 = _mm_add_pd(fjx0,tx);
1157 fjy0 = _mm_add_pd(fjy0,ty);
1158 fjz0 = _mm_add_pd(fjz0,tz);
1160 /**************************
1161 * CALCULATE INTERACTIONS *
1162 **************************/
1164 /* COULOMB ELECTROSTATICS */
1165 velec = _mm_mul_pd(qq01,rinv01);
1166 felec = _mm_mul_pd(velec,rinvsq01);
1170 /* Calculate temporary vectorial force */
1171 tx = _mm_mul_pd(fscal,dx01);
1172 ty = _mm_mul_pd(fscal,dy01);
1173 tz = _mm_mul_pd(fscal,dz01);
1175 /* Update vectorial force */
1176 fix0 = _mm_add_pd(fix0,tx);
1177 fiy0 = _mm_add_pd(fiy0,ty);
1178 fiz0 = _mm_add_pd(fiz0,tz);
1180 fjx1 = _mm_add_pd(fjx1,tx);
1181 fjy1 = _mm_add_pd(fjy1,ty);
1182 fjz1 = _mm_add_pd(fjz1,tz);
1184 /**************************
1185 * CALCULATE INTERACTIONS *
1186 **************************/
1188 /* COULOMB ELECTROSTATICS */
1189 velec = _mm_mul_pd(qq02,rinv02);
1190 felec = _mm_mul_pd(velec,rinvsq02);
1194 /* Calculate temporary vectorial force */
1195 tx = _mm_mul_pd(fscal,dx02);
1196 ty = _mm_mul_pd(fscal,dy02);
1197 tz = _mm_mul_pd(fscal,dz02);
1199 /* Update vectorial force */
1200 fix0 = _mm_add_pd(fix0,tx);
1201 fiy0 = _mm_add_pd(fiy0,ty);
1202 fiz0 = _mm_add_pd(fiz0,tz);
1204 fjx2 = _mm_add_pd(fjx2,tx);
1205 fjy2 = _mm_add_pd(fjy2,ty);
1206 fjz2 = _mm_add_pd(fjz2,tz);
1208 /**************************
1209 * CALCULATE INTERACTIONS *
1210 **************************/
1212 /* COULOMB ELECTROSTATICS */
1213 velec = _mm_mul_pd(qq10,rinv10);
1214 felec = _mm_mul_pd(velec,rinvsq10);
1218 /* Calculate temporary vectorial force */
1219 tx = _mm_mul_pd(fscal,dx10);
1220 ty = _mm_mul_pd(fscal,dy10);
1221 tz = _mm_mul_pd(fscal,dz10);
1223 /* Update vectorial force */
1224 fix1 = _mm_add_pd(fix1,tx);
1225 fiy1 = _mm_add_pd(fiy1,ty);
1226 fiz1 = _mm_add_pd(fiz1,tz);
1228 fjx0 = _mm_add_pd(fjx0,tx);
1229 fjy0 = _mm_add_pd(fjy0,ty);
1230 fjz0 = _mm_add_pd(fjz0,tz);
1232 /**************************
1233 * CALCULATE INTERACTIONS *
1234 **************************/
1236 /* COULOMB ELECTROSTATICS */
1237 velec = _mm_mul_pd(qq11,rinv11);
1238 felec = _mm_mul_pd(velec,rinvsq11);
1242 /* Calculate temporary vectorial force */
1243 tx = _mm_mul_pd(fscal,dx11);
1244 ty = _mm_mul_pd(fscal,dy11);
1245 tz = _mm_mul_pd(fscal,dz11);
1247 /* Update vectorial force */
1248 fix1 = _mm_add_pd(fix1,tx);
1249 fiy1 = _mm_add_pd(fiy1,ty);
1250 fiz1 = _mm_add_pd(fiz1,tz);
1252 fjx1 = _mm_add_pd(fjx1,tx);
1253 fjy1 = _mm_add_pd(fjy1,ty);
1254 fjz1 = _mm_add_pd(fjz1,tz);
1256 /**************************
1257 * CALCULATE INTERACTIONS *
1258 **************************/
1260 /* COULOMB ELECTROSTATICS */
1261 velec = _mm_mul_pd(qq12,rinv12);
1262 felec = _mm_mul_pd(velec,rinvsq12);
1266 /* Calculate temporary vectorial force */
1267 tx = _mm_mul_pd(fscal,dx12);
1268 ty = _mm_mul_pd(fscal,dy12);
1269 tz = _mm_mul_pd(fscal,dz12);
1271 /* Update vectorial force */
1272 fix1 = _mm_add_pd(fix1,tx);
1273 fiy1 = _mm_add_pd(fiy1,ty);
1274 fiz1 = _mm_add_pd(fiz1,tz);
1276 fjx2 = _mm_add_pd(fjx2,tx);
1277 fjy2 = _mm_add_pd(fjy2,ty);
1278 fjz2 = _mm_add_pd(fjz2,tz);
1280 /**************************
1281 * CALCULATE INTERACTIONS *
1282 **************************/
1284 /* COULOMB ELECTROSTATICS */
1285 velec = _mm_mul_pd(qq20,rinv20);
1286 felec = _mm_mul_pd(velec,rinvsq20);
1290 /* Calculate temporary vectorial force */
1291 tx = _mm_mul_pd(fscal,dx20);
1292 ty = _mm_mul_pd(fscal,dy20);
1293 tz = _mm_mul_pd(fscal,dz20);
1295 /* Update vectorial force */
1296 fix2 = _mm_add_pd(fix2,tx);
1297 fiy2 = _mm_add_pd(fiy2,ty);
1298 fiz2 = _mm_add_pd(fiz2,tz);
1300 fjx0 = _mm_add_pd(fjx0,tx);
1301 fjy0 = _mm_add_pd(fjy0,ty);
1302 fjz0 = _mm_add_pd(fjz0,tz);
1304 /**************************
1305 * CALCULATE INTERACTIONS *
1306 **************************/
1308 /* COULOMB ELECTROSTATICS */
1309 velec = _mm_mul_pd(qq21,rinv21);
1310 felec = _mm_mul_pd(velec,rinvsq21);
1314 /* Calculate temporary vectorial force */
1315 tx = _mm_mul_pd(fscal,dx21);
1316 ty = _mm_mul_pd(fscal,dy21);
1317 tz = _mm_mul_pd(fscal,dz21);
1319 /* Update vectorial force */
1320 fix2 = _mm_add_pd(fix2,tx);
1321 fiy2 = _mm_add_pd(fiy2,ty);
1322 fiz2 = _mm_add_pd(fiz2,tz);
1324 fjx1 = _mm_add_pd(fjx1,tx);
1325 fjy1 = _mm_add_pd(fjy1,ty);
1326 fjz1 = _mm_add_pd(fjz1,tz);
1328 /**************************
1329 * CALCULATE INTERACTIONS *
1330 **************************/
1332 /* COULOMB ELECTROSTATICS */
1333 velec = _mm_mul_pd(qq22,rinv22);
1334 felec = _mm_mul_pd(velec,rinvsq22);
1338 /* Calculate temporary vectorial force */
1339 tx = _mm_mul_pd(fscal,dx22);
1340 ty = _mm_mul_pd(fscal,dy22);
1341 tz = _mm_mul_pd(fscal,dz22);
1343 /* Update vectorial force */
1344 fix2 = _mm_add_pd(fix2,tx);
1345 fiy2 = _mm_add_pd(fiy2,ty);
1346 fiz2 = _mm_add_pd(fiz2,tz);
1348 fjx2 = _mm_add_pd(fjx2,tx);
1349 fjy2 = _mm_add_pd(fjy2,ty);
1350 fjz2 = _mm_add_pd(fjz2,tz);
1352 gmx_mm_decrement_3rvec_2ptr_swizzle_pd(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1354 /* Inner loop uses 250 flops */
1357 if(jidx<j_index_end)
1361 j_coord_offsetA = DIM*jnrA;
1363 /* load j atom coordinates */
1364 gmx_mm_load_3rvec_1ptr_swizzle_pd(x+j_coord_offsetA,
1365 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1367 /* Calculate displacement vector */
1368 dx00 = _mm_sub_pd(ix0,jx0);
1369 dy00 = _mm_sub_pd(iy0,jy0);
1370 dz00 = _mm_sub_pd(iz0,jz0);
1371 dx01 = _mm_sub_pd(ix0,jx1);
1372 dy01 = _mm_sub_pd(iy0,jy1);
1373 dz01 = _mm_sub_pd(iz0,jz1);
1374 dx02 = _mm_sub_pd(ix0,jx2);
1375 dy02 = _mm_sub_pd(iy0,jy2);
1376 dz02 = _mm_sub_pd(iz0,jz2);
1377 dx10 = _mm_sub_pd(ix1,jx0);
1378 dy10 = _mm_sub_pd(iy1,jy0);
1379 dz10 = _mm_sub_pd(iz1,jz0);
1380 dx11 = _mm_sub_pd(ix1,jx1);
1381 dy11 = _mm_sub_pd(iy1,jy1);
1382 dz11 = _mm_sub_pd(iz1,jz1);
1383 dx12 = _mm_sub_pd(ix1,jx2);
1384 dy12 = _mm_sub_pd(iy1,jy2);
1385 dz12 = _mm_sub_pd(iz1,jz2);
1386 dx20 = _mm_sub_pd(ix2,jx0);
1387 dy20 = _mm_sub_pd(iy2,jy0);
1388 dz20 = _mm_sub_pd(iz2,jz0);
1389 dx21 = _mm_sub_pd(ix2,jx1);
1390 dy21 = _mm_sub_pd(iy2,jy1);
1391 dz21 = _mm_sub_pd(iz2,jz1);
1392 dx22 = _mm_sub_pd(ix2,jx2);
1393 dy22 = _mm_sub_pd(iy2,jy2);
1394 dz22 = _mm_sub_pd(iz2,jz2);
1396 /* Calculate squared distance and things based on it */
1397 rsq00 = gmx_mm_calc_rsq_pd(dx00,dy00,dz00);
1398 rsq01 = gmx_mm_calc_rsq_pd(dx01,dy01,dz01);
1399 rsq02 = gmx_mm_calc_rsq_pd(dx02,dy02,dz02);
1400 rsq10 = gmx_mm_calc_rsq_pd(dx10,dy10,dz10);
1401 rsq11 = gmx_mm_calc_rsq_pd(dx11,dy11,dz11);
1402 rsq12 = gmx_mm_calc_rsq_pd(dx12,dy12,dz12);
1403 rsq20 = gmx_mm_calc_rsq_pd(dx20,dy20,dz20);
1404 rsq21 = gmx_mm_calc_rsq_pd(dx21,dy21,dz21);
1405 rsq22 = gmx_mm_calc_rsq_pd(dx22,dy22,dz22);
1407 rinv00 = gmx_mm_invsqrt_pd(rsq00);
1408 rinv01 = gmx_mm_invsqrt_pd(rsq01);
1409 rinv02 = gmx_mm_invsqrt_pd(rsq02);
1410 rinv10 = gmx_mm_invsqrt_pd(rsq10);
1411 rinv11 = gmx_mm_invsqrt_pd(rsq11);
1412 rinv12 = gmx_mm_invsqrt_pd(rsq12);
1413 rinv20 = gmx_mm_invsqrt_pd(rsq20);
1414 rinv21 = gmx_mm_invsqrt_pd(rsq21);
1415 rinv22 = gmx_mm_invsqrt_pd(rsq22);
1417 rinvsq00 = _mm_mul_pd(rinv00,rinv00);
1418 rinvsq01 = _mm_mul_pd(rinv01,rinv01);
1419 rinvsq02 = _mm_mul_pd(rinv02,rinv02);
1420 rinvsq10 = _mm_mul_pd(rinv10,rinv10);
1421 rinvsq11 = _mm_mul_pd(rinv11,rinv11);
1422 rinvsq12 = _mm_mul_pd(rinv12,rinv12);
1423 rinvsq20 = _mm_mul_pd(rinv20,rinv20);
1424 rinvsq21 = _mm_mul_pd(rinv21,rinv21);
1425 rinvsq22 = _mm_mul_pd(rinv22,rinv22);
1427 fjx0 = _mm_setzero_pd();
1428 fjy0 = _mm_setzero_pd();
1429 fjz0 = _mm_setzero_pd();
1430 fjx1 = _mm_setzero_pd();
1431 fjy1 = _mm_setzero_pd();
1432 fjz1 = _mm_setzero_pd();
1433 fjx2 = _mm_setzero_pd();
1434 fjy2 = _mm_setzero_pd();
1435 fjz2 = _mm_setzero_pd();
1437 /**************************
1438 * CALCULATE INTERACTIONS *
1439 **************************/
1441 /* COULOMB ELECTROSTATICS */
1442 velec = _mm_mul_pd(qq00,rinv00);
1443 felec = _mm_mul_pd(velec,rinvsq00);
1445 /* LENNARD-JONES DISPERSION/REPULSION */
1447 rinvsix = _mm_mul_pd(_mm_mul_pd(rinvsq00,rinvsq00),rinvsq00);
1448 fvdw = _mm_mul_pd(_mm_sub_pd(_mm_mul_pd(c12_00,rinvsix),c6_00),_mm_mul_pd(rinvsix,rinvsq00));
1450 fscal = _mm_add_pd(felec,fvdw);
1452 fscal = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1454 /* Calculate temporary vectorial force */
1455 tx = _mm_mul_pd(fscal,dx00);
1456 ty = _mm_mul_pd(fscal,dy00);
1457 tz = _mm_mul_pd(fscal,dz00);
1459 /* Update vectorial force */
1460 fix0 = _mm_add_pd(fix0,tx);
1461 fiy0 = _mm_add_pd(fiy0,ty);
1462 fiz0 = _mm_add_pd(fiz0,tz);
1464 fjx0 = _mm_add_pd(fjx0,tx);
1465 fjy0 = _mm_add_pd(fjy0,ty);
1466 fjz0 = _mm_add_pd(fjz0,tz);
1468 /**************************
1469 * CALCULATE INTERACTIONS *
1470 **************************/
1472 /* COULOMB ELECTROSTATICS */
1473 velec = _mm_mul_pd(qq01,rinv01);
1474 felec = _mm_mul_pd(velec,rinvsq01);
1478 fscal = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1480 /* Calculate temporary vectorial force */
1481 tx = _mm_mul_pd(fscal,dx01);
1482 ty = _mm_mul_pd(fscal,dy01);
1483 tz = _mm_mul_pd(fscal,dz01);
1485 /* Update vectorial force */
1486 fix0 = _mm_add_pd(fix0,tx);
1487 fiy0 = _mm_add_pd(fiy0,ty);
1488 fiz0 = _mm_add_pd(fiz0,tz);
1490 fjx1 = _mm_add_pd(fjx1,tx);
1491 fjy1 = _mm_add_pd(fjy1,ty);
1492 fjz1 = _mm_add_pd(fjz1,tz);
1494 /**************************
1495 * CALCULATE INTERACTIONS *
1496 **************************/
1498 /* COULOMB ELECTROSTATICS */
1499 velec = _mm_mul_pd(qq02,rinv02);
1500 felec = _mm_mul_pd(velec,rinvsq02);
1504 fscal = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1506 /* Calculate temporary vectorial force */
1507 tx = _mm_mul_pd(fscal,dx02);
1508 ty = _mm_mul_pd(fscal,dy02);
1509 tz = _mm_mul_pd(fscal,dz02);
1511 /* Update vectorial force */
1512 fix0 = _mm_add_pd(fix0,tx);
1513 fiy0 = _mm_add_pd(fiy0,ty);
1514 fiz0 = _mm_add_pd(fiz0,tz);
1516 fjx2 = _mm_add_pd(fjx2,tx);
1517 fjy2 = _mm_add_pd(fjy2,ty);
1518 fjz2 = _mm_add_pd(fjz2,tz);
1520 /**************************
1521 * CALCULATE INTERACTIONS *
1522 **************************/
1524 /* COULOMB ELECTROSTATICS */
1525 velec = _mm_mul_pd(qq10,rinv10);
1526 felec = _mm_mul_pd(velec,rinvsq10);
1530 fscal = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1532 /* Calculate temporary vectorial force */
1533 tx = _mm_mul_pd(fscal,dx10);
1534 ty = _mm_mul_pd(fscal,dy10);
1535 tz = _mm_mul_pd(fscal,dz10);
1537 /* Update vectorial force */
1538 fix1 = _mm_add_pd(fix1,tx);
1539 fiy1 = _mm_add_pd(fiy1,ty);
1540 fiz1 = _mm_add_pd(fiz1,tz);
1542 fjx0 = _mm_add_pd(fjx0,tx);
1543 fjy0 = _mm_add_pd(fjy0,ty);
1544 fjz0 = _mm_add_pd(fjz0,tz);
1546 /**************************
1547 * CALCULATE INTERACTIONS *
1548 **************************/
1550 /* COULOMB ELECTROSTATICS */
1551 velec = _mm_mul_pd(qq11,rinv11);
1552 felec = _mm_mul_pd(velec,rinvsq11);
1556 fscal = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1558 /* Calculate temporary vectorial force */
1559 tx = _mm_mul_pd(fscal,dx11);
1560 ty = _mm_mul_pd(fscal,dy11);
1561 tz = _mm_mul_pd(fscal,dz11);
1563 /* Update vectorial force */
1564 fix1 = _mm_add_pd(fix1,tx);
1565 fiy1 = _mm_add_pd(fiy1,ty);
1566 fiz1 = _mm_add_pd(fiz1,tz);
1568 fjx1 = _mm_add_pd(fjx1,tx);
1569 fjy1 = _mm_add_pd(fjy1,ty);
1570 fjz1 = _mm_add_pd(fjz1,tz);
1572 /**************************
1573 * CALCULATE INTERACTIONS *
1574 **************************/
1576 /* COULOMB ELECTROSTATICS */
1577 velec = _mm_mul_pd(qq12,rinv12);
1578 felec = _mm_mul_pd(velec,rinvsq12);
1582 fscal = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1584 /* Calculate temporary vectorial force */
1585 tx = _mm_mul_pd(fscal,dx12);
1586 ty = _mm_mul_pd(fscal,dy12);
1587 tz = _mm_mul_pd(fscal,dz12);
1589 /* Update vectorial force */
1590 fix1 = _mm_add_pd(fix1,tx);
1591 fiy1 = _mm_add_pd(fiy1,ty);
1592 fiz1 = _mm_add_pd(fiz1,tz);
1594 fjx2 = _mm_add_pd(fjx2,tx);
1595 fjy2 = _mm_add_pd(fjy2,ty);
1596 fjz2 = _mm_add_pd(fjz2,tz);
1598 /**************************
1599 * CALCULATE INTERACTIONS *
1600 **************************/
1602 /* COULOMB ELECTROSTATICS */
1603 velec = _mm_mul_pd(qq20,rinv20);
1604 felec = _mm_mul_pd(velec,rinvsq20);
1608 fscal = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1610 /* Calculate temporary vectorial force */
1611 tx = _mm_mul_pd(fscal,dx20);
1612 ty = _mm_mul_pd(fscal,dy20);
1613 tz = _mm_mul_pd(fscal,dz20);
1615 /* Update vectorial force */
1616 fix2 = _mm_add_pd(fix2,tx);
1617 fiy2 = _mm_add_pd(fiy2,ty);
1618 fiz2 = _mm_add_pd(fiz2,tz);
1620 fjx0 = _mm_add_pd(fjx0,tx);
1621 fjy0 = _mm_add_pd(fjy0,ty);
1622 fjz0 = _mm_add_pd(fjz0,tz);
1624 /**************************
1625 * CALCULATE INTERACTIONS *
1626 **************************/
1628 /* COULOMB ELECTROSTATICS */
1629 velec = _mm_mul_pd(qq21,rinv21);
1630 felec = _mm_mul_pd(velec,rinvsq21);
1634 fscal = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1636 /* Calculate temporary vectorial force */
1637 tx = _mm_mul_pd(fscal,dx21);
1638 ty = _mm_mul_pd(fscal,dy21);
1639 tz = _mm_mul_pd(fscal,dz21);
1641 /* Update vectorial force */
1642 fix2 = _mm_add_pd(fix2,tx);
1643 fiy2 = _mm_add_pd(fiy2,ty);
1644 fiz2 = _mm_add_pd(fiz2,tz);
1646 fjx1 = _mm_add_pd(fjx1,tx);
1647 fjy1 = _mm_add_pd(fjy1,ty);
1648 fjz1 = _mm_add_pd(fjz1,tz);
1650 /**************************
1651 * CALCULATE INTERACTIONS *
1652 **************************/
1654 /* COULOMB ELECTROSTATICS */
1655 velec = _mm_mul_pd(qq22,rinv22);
1656 felec = _mm_mul_pd(velec,rinvsq22);
1660 fscal = _mm_unpacklo_pd(fscal,_mm_setzero_pd());
1662 /* Calculate temporary vectorial force */
1663 tx = _mm_mul_pd(fscal,dx22);
1664 ty = _mm_mul_pd(fscal,dy22);
1665 tz = _mm_mul_pd(fscal,dz22);
1667 /* Update vectorial force */
1668 fix2 = _mm_add_pd(fix2,tx);
1669 fiy2 = _mm_add_pd(fiy2,ty);
1670 fiz2 = _mm_add_pd(fiz2,tz);
1672 fjx2 = _mm_add_pd(fjx2,tx);
1673 fjy2 = _mm_add_pd(fjy2,ty);
1674 fjz2 = _mm_add_pd(fjz2,tz);
1676 gmx_mm_decrement_3rvec_1ptr_swizzle_pd(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1678 /* Inner loop uses 250 flops */
1681 /* End of innermost loop */
1683 gmx_mm_update_iforce_3atom_swizzle_pd(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1684 f+i_coord_offset,fshift+i_shift_offset);
1686 /* Increment number of inner iterations */
1687 inneriter += j_index_end - j_index_start;
1689 /* Outer loop uses 18 flops */
1692 /* Increment number of outer iterations */
1695 /* Update outer/inner flops */
1697 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*250);