Bug Summary

File:gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRF_VdwLJ_GeomW3W3_sse4_1_single.c
Location:line 1081, column 5
Description:Value stored to 'jnrA' is never read

Annotated Source Code

1/*
2 * This file is part of the GROMACS molecular simulation package.
3 *
4 * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
8 *
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
13 *
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23 *
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
31 *
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
34 */
35/*
36 * Note: this file was generated by the GROMACS sse4_1_single kernel generator.
37 */
38#ifdef HAVE_CONFIG_H1
39#include <config.h>
40#endif
41
42#include <math.h>
43
44#include "../nb_kernel.h"
45#include "types/simple.h"
46#include "gromacs/math/vec.h"
47#include "nrnb.h"
48
49#include "gromacs/simd/math_x86_sse4_1_single.h"
50#include "kernelutil_x86_sse4_1_single.h"
51
52/*
53 * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_sse4_1_single
54 * Electrostatics interaction: ReactionField
55 * VdW interaction: LennardJones
56 * Geometry: Water3-Water3
57 * Calculate force/pot: PotentialAndForce
58 */
59void
60nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_sse4_1_single
61 (t_nblist * gmx_restrict nlist,
62 rvec * gmx_restrict xx,
63 rvec * gmx_restrict ff,
64 t_forcerec * gmx_restrict fr,
65 t_mdatoms * gmx_restrict mdatoms,
66 nb_kernel_data_t gmx_unused__attribute__ ((unused)) * gmx_restrict kernel_data,
67 t_nrnb * gmx_restrict nrnb)
68{
69 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
70 * just 0 for non-waters.
71 * Suffixes A,B,C,D refer to j loop unrolling done with SSE, e.g. for the four different
72 * jnr indices corresponding to data put in the four positions in the SIMD register.
73 */
74 int i_shift_offset,i_coord_offset,outeriter,inneriter;
75 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
76 int jnrA,jnrB,jnrC,jnrD;
77 int jnrlistA,jnrlistB,jnrlistC,jnrlistD;
78 int j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD;
79 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
80 real rcutoff_scalar;
81 real *shiftvec,*fshift,*x,*f;
82 real *fjptrA,*fjptrB,*fjptrC,*fjptrD;
83 real scratch[4*DIM3];
84 __m128 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
85 int vdwioffset0;
86 __m128 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
87 int vdwioffset1;
88 __m128 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
89 int vdwioffset2;
90 __m128 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
91 int vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D;
92 __m128 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
93 int vdwjidx1A,vdwjidx1B,vdwjidx1C,vdwjidx1D;
94 __m128 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
95 int vdwjidx2A,vdwjidx2B,vdwjidx2C,vdwjidx2D;
96 __m128 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
97 __m128 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
98 __m128 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
99 __m128 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
100 __m128 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
101 __m128 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
102 __m128 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
103 __m128 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
104 __m128 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
105 __m128 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
106 __m128 velec,felec,velecsum,facel,crf,krf,krf2;
107 real *charge;
108 int nvdwtype;
109 __m128 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
110 int *vdwtype;
111 real *vdwparam;
112 __m128 one_sixth = _mm_set1_ps(1.0/6.0);
113 __m128 one_twelfth = _mm_set1_ps(1.0/12.0);
114 __m128 dummy_mask,cutoff_mask;
115 __m128 signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) );
116 __m128 one = _mm_set1_ps(1.0);
117 __m128 two = _mm_set1_ps(2.0);
118 x = xx[0];
119 f = ff[0];
120
121 nri = nlist->nri;
122 iinr = nlist->iinr;
123 jindex = nlist->jindex;
124 jjnr = nlist->jjnr;
125 shiftidx = nlist->shift;
126 gid = nlist->gid;
127 shiftvec = fr->shift_vec[0];
128 fshift = fr->fshift[0];
129 facel = _mm_set1_ps(fr->epsfac);
130 charge = mdatoms->chargeA;
131 krf = _mm_set1_ps(fr->ic->k_rf);
132 krf2 = _mm_set1_ps(fr->ic->k_rf*2.0);
133 crf = _mm_set1_ps(fr->ic->c_rf);
134 nvdwtype = fr->ntype;
135 vdwparam = fr->nbfp;
136 vdwtype = mdatoms->typeA;
137
138 /* Setup water-specific parameters */
139 inr = nlist->iinr[0];
140 iq0 = _mm_mul_ps(facel,_mm_set1_ps(charge[inr+0]));
141 iq1 = _mm_mul_ps(facel,_mm_set1_ps(charge[inr+1]));
142 iq2 = _mm_mul_ps(facel,_mm_set1_ps(charge[inr+2]));
143 vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
144
145 jq0 = _mm_set1_ps(charge[inr+0]);
146 jq1 = _mm_set1_ps(charge[inr+1]);
147 jq2 = _mm_set1_ps(charge[inr+2]);
148 vdwjidx0A = 2*vdwtype[inr+0];
149 qq00 = _mm_mul_ps(iq0,jq0);
150 c6_00 = _mm_set1_ps(vdwparam[vdwioffset0+vdwjidx0A]);
151 c12_00 = _mm_set1_ps(vdwparam[vdwioffset0+vdwjidx0A+1]);
152 qq01 = _mm_mul_ps(iq0,jq1);
153 qq02 = _mm_mul_ps(iq0,jq2);
154 qq10 = _mm_mul_ps(iq1,jq0);
155 qq11 = _mm_mul_ps(iq1,jq1);
156 qq12 = _mm_mul_ps(iq1,jq2);
157 qq20 = _mm_mul_ps(iq2,jq0);
158 qq21 = _mm_mul_ps(iq2,jq1);
159 qq22 = _mm_mul_ps(iq2,jq2);
160
161 /* Avoid stupid compiler warnings */
162 jnrA = jnrB = jnrC = jnrD = 0;
163 j_coord_offsetA = 0;
164 j_coord_offsetB = 0;
165 j_coord_offsetC = 0;
166 j_coord_offsetD = 0;
167
168 outeriter = 0;
169 inneriter = 0;
170
171 for(iidx=0;iidx<4*DIM3;iidx++)
172 {
173 scratch[iidx] = 0.0;
174 }
175
176 /* Start outer loop over neighborlists */
177 for(iidx=0; iidx<nri; iidx++)
178 {
179 /* Load shift vector for this list */
180 i_shift_offset = DIM3*shiftidx[iidx];
181
182 /* Load limits for loop over neighbors */
183 j_index_start = jindex[iidx];
184 j_index_end = jindex[iidx+1];
185
186 /* Get outer coordinate index */
187 inr = iinr[iidx];
188 i_coord_offset = DIM3*inr;
189
190 /* Load i particle coords and add shift vector */
191 gmx_mm_load_shift_and_3rvec_broadcast_ps(shiftvec+i_shift_offset,x+i_coord_offset,
192 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
193
194 fix0 = _mm_setzero_ps();
195 fiy0 = _mm_setzero_ps();
196 fiz0 = _mm_setzero_ps();
197 fix1 = _mm_setzero_ps();
198 fiy1 = _mm_setzero_ps();
199 fiz1 = _mm_setzero_ps();
200 fix2 = _mm_setzero_ps();
201 fiy2 = _mm_setzero_ps();
202 fiz2 = _mm_setzero_ps();
203
204 /* Reset potential sums */
205 velecsum = _mm_setzero_ps();
206 vvdwsum = _mm_setzero_ps();
207
208 /* Start inner kernel loop */
209 for(jidx=j_index_start; jidx<j_index_end && jjnr[jidx+3]>=0; jidx+=4)
210 {
211
212 /* Get j neighbor index, and coordinate index */
213 jnrA = jjnr[jidx];
214 jnrB = jjnr[jidx+1];
215 jnrC = jjnr[jidx+2];
216 jnrD = jjnr[jidx+3];
217 j_coord_offsetA = DIM3*jnrA;
218 j_coord_offsetB = DIM3*jnrB;
219 j_coord_offsetC = DIM3*jnrC;
220 j_coord_offsetD = DIM3*jnrD;
221
222 /* load j atom coordinates */
223 gmx_mm_load_3rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
224 x+j_coord_offsetC,x+j_coord_offsetD,
225 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
226
227 /* Calculate displacement vector */
228 dx00 = _mm_sub_ps(ix0,jx0);
229 dy00 = _mm_sub_ps(iy0,jy0);
230 dz00 = _mm_sub_ps(iz0,jz0);
231 dx01 = _mm_sub_ps(ix0,jx1);
232 dy01 = _mm_sub_ps(iy0,jy1);
233 dz01 = _mm_sub_ps(iz0,jz1);
234 dx02 = _mm_sub_ps(ix0,jx2);
235 dy02 = _mm_sub_ps(iy0,jy2);
236 dz02 = _mm_sub_ps(iz0,jz2);
237 dx10 = _mm_sub_ps(ix1,jx0);
238 dy10 = _mm_sub_ps(iy1,jy0);
239 dz10 = _mm_sub_ps(iz1,jz0);
240 dx11 = _mm_sub_ps(ix1,jx1);
241 dy11 = _mm_sub_ps(iy1,jy1);
242 dz11 = _mm_sub_ps(iz1,jz1);
243 dx12 = _mm_sub_ps(ix1,jx2);
244 dy12 = _mm_sub_ps(iy1,jy2);
245 dz12 = _mm_sub_ps(iz1,jz2);
246 dx20 = _mm_sub_ps(ix2,jx0);
247 dy20 = _mm_sub_ps(iy2,jy0);
248 dz20 = _mm_sub_ps(iz2,jz0);
249 dx21 = _mm_sub_ps(ix2,jx1);
250 dy21 = _mm_sub_ps(iy2,jy1);
251 dz21 = _mm_sub_ps(iz2,jz1);
252 dx22 = _mm_sub_ps(ix2,jx2);
253 dy22 = _mm_sub_ps(iy2,jy2);
254 dz22 = _mm_sub_ps(iz2,jz2);
255
256 /* Calculate squared distance and things based on it */
257 rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
258 rsq01 = gmx_mm_calc_rsq_ps(dx01,dy01,dz01);
259 rsq02 = gmx_mm_calc_rsq_ps(dx02,dy02,dz02);
260 rsq10 = gmx_mm_calc_rsq_ps(dx10,dy10,dz10);
261 rsq11 = gmx_mm_calc_rsq_ps(dx11,dy11,dz11);
262 rsq12 = gmx_mm_calc_rsq_ps(dx12,dy12,dz12);
263 rsq20 = gmx_mm_calc_rsq_ps(dx20,dy20,dz20);
264 rsq21 = gmx_mm_calc_rsq_ps(dx21,dy21,dz21);
265 rsq22 = gmx_mm_calc_rsq_ps(dx22,dy22,dz22);
266
267 rinv00 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq00);
268 rinv01 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq01);
269 rinv02 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq02);
270 rinv10 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq10);
271 rinv11 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq11);
272 rinv12 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq12);
273 rinv20 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq20);
274 rinv21 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq21);
275 rinv22 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq22);
276
277 rinvsq00 = _mm_mul_ps(rinv00,rinv00);
278 rinvsq01 = _mm_mul_ps(rinv01,rinv01);
279 rinvsq02 = _mm_mul_ps(rinv02,rinv02);
280 rinvsq10 = _mm_mul_ps(rinv10,rinv10);
281 rinvsq11 = _mm_mul_ps(rinv11,rinv11);
282 rinvsq12 = _mm_mul_ps(rinv12,rinv12);
283 rinvsq20 = _mm_mul_ps(rinv20,rinv20);
284 rinvsq21 = _mm_mul_ps(rinv21,rinv21);
285 rinvsq22 = _mm_mul_ps(rinv22,rinv22);
286
287 fjx0 = _mm_setzero_ps();
288 fjy0 = _mm_setzero_ps();
289 fjz0 = _mm_setzero_ps();
290 fjx1 = _mm_setzero_ps();
291 fjy1 = _mm_setzero_ps();
292 fjz1 = _mm_setzero_ps();
293 fjx2 = _mm_setzero_ps();
294 fjy2 = _mm_setzero_ps();
295 fjz2 = _mm_setzero_ps();
296
297 /**************************
298 * CALCULATE INTERACTIONS *
299 **************************/
300
301 /* REACTION-FIELD ELECTROSTATICS */
302 velec = _mm_mul_ps(qq00,_mm_sub_ps(_mm_add_ps(rinv00,_mm_mul_ps(krf,rsq00)),crf));
303 felec = _mm_mul_ps(qq00,_mm_sub_ps(_mm_mul_ps(rinv00,rinvsq00),krf2));
304
305 /* LENNARD-JONES DISPERSION/REPULSION */
306
307 rinvsix = _mm_mul_ps(_mm_mul_ps(rinvsq00,rinvsq00),rinvsq00);
308 vvdw6 = _mm_mul_ps(c6_00,rinvsix);
309 vvdw12 = _mm_mul_ps(c12_00,_mm_mul_ps(rinvsix,rinvsix));
310 vvdw = _mm_sub_ps( _mm_mul_ps(vvdw12,one_twelfth) , _mm_mul_ps(vvdw6,one_sixth) );
311 fvdw = _mm_mul_ps(_mm_sub_ps(vvdw12,vvdw6),rinvsq00);
312
313 /* Update potential sum for this i atom from the interaction with this j atom. */
314 velecsum = _mm_add_ps(velecsum,velec);
315 vvdwsum = _mm_add_ps(vvdwsum,vvdw);
316
317 fscal = _mm_add_ps(felec,fvdw);
318
319 /* Calculate temporary vectorial force */
320 tx = _mm_mul_ps(fscal,dx00);
321 ty = _mm_mul_ps(fscal,dy00);
322 tz = _mm_mul_ps(fscal,dz00);
323
324 /* Update vectorial force */
325 fix0 = _mm_add_ps(fix0,tx);
326 fiy0 = _mm_add_ps(fiy0,ty);
327 fiz0 = _mm_add_ps(fiz0,tz);
328
329 fjx0 = _mm_add_ps(fjx0,tx);
330 fjy0 = _mm_add_ps(fjy0,ty);
331 fjz0 = _mm_add_ps(fjz0,tz);
332
333 /**************************
334 * CALCULATE INTERACTIONS *
335 **************************/
336
337 /* REACTION-FIELD ELECTROSTATICS */
338 velec = _mm_mul_ps(qq01,_mm_sub_ps(_mm_add_ps(rinv01,_mm_mul_ps(krf,rsq01)),crf));
339 felec = _mm_mul_ps(qq01,_mm_sub_ps(_mm_mul_ps(rinv01,rinvsq01),krf2));
340
341 /* Update potential sum for this i atom from the interaction with this j atom. */
342 velecsum = _mm_add_ps(velecsum,velec);
343
344 fscal = felec;
345
346 /* Calculate temporary vectorial force */
347 tx = _mm_mul_ps(fscal,dx01);
348 ty = _mm_mul_ps(fscal,dy01);
349 tz = _mm_mul_ps(fscal,dz01);
350
351 /* Update vectorial force */
352 fix0 = _mm_add_ps(fix0,tx);
353 fiy0 = _mm_add_ps(fiy0,ty);
354 fiz0 = _mm_add_ps(fiz0,tz);
355
356 fjx1 = _mm_add_ps(fjx1,tx);
357 fjy1 = _mm_add_ps(fjy1,ty);
358 fjz1 = _mm_add_ps(fjz1,tz);
359
360 /**************************
361 * CALCULATE INTERACTIONS *
362 **************************/
363
364 /* REACTION-FIELD ELECTROSTATICS */
365 velec = _mm_mul_ps(qq02,_mm_sub_ps(_mm_add_ps(rinv02,_mm_mul_ps(krf,rsq02)),crf));
366 felec = _mm_mul_ps(qq02,_mm_sub_ps(_mm_mul_ps(rinv02,rinvsq02),krf2));
367
368 /* Update potential sum for this i atom from the interaction with this j atom. */
369 velecsum = _mm_add_ps(velecsum,velec);
370
371 fscal = felec;
372
373 /* Calculate temporary vectorial force */
374 tx = _mm_mul_ps(fscal,dx02);
375 ty = _mm_mul_ps(fscal,dy02);
376 tz = _mm_mul_ps(fscal,dz02);
377
378 /* Update vectorial force */
379 fix0 = _mm_add_ps(fix0,tx);
380 fiy0 = _mm_add_ps(fiy0,ty);
381 fiz0 = _mm_add_ps(fiz0,tz);
382
383 fjx2 = _mm_add_ps(fjx2,tx);
384 fjy2 = _mm_add_ps(fjy2,ty);
385 fjz2 = _mm_add_ps(fjz2,tz);
386
387 /**************************
388 * CALCULATE INTERACTIONS *
389 **************************/
390
391 /* REACTION-FIELD ELECTROSTATICS */
392 velec = _mm_mul_ps(qq10,_mm_sub_ps(_mm_add_ps(rinv10,_mm_mul_ps(krf,rsq10)),crf));
393 felec = _mm_mul_ps(qq10,_mm_sub_ps(_mm_mul_ps(rinv10,rinvsq10),krf2));
394
395 /* Update potential sum for this i atom from the interaction with this j atom. */
396 velecsum = _mm_add_ps(velecsum,velec);
397
398 fscal = felec;
399
400 /* Calculate temporary vectorial force */
401 tx = _mm_mul_ps(fscal,dx10);
402 ty = _mm_mul_ps(fscal,dy10);
403 tz = _mm_mul_ps(fscal,dz10);
404
405 /* Update vectorial force */
406 fix1 = _mm_add_ps(fix1,tx);
407 fiy1 = _mm_add_ps(fiy1,ty);
408 fiz1 = _mm_add_ps(fiz1,tz);
409
410 fjx0 = _mm_add_ps(fjx0,tx);
411 fjy0 = _mm_add_ps(fjy0,ty);
412 fjz0 = _mm_add_ps(fjz0,tz);
413
414 /**************************
415 * CALCULATE INTERACTIONS *
416 **************************/
417
418 /* REACTION-FIELD ELECTROSTATICS */
419 velec = _mm_mul_ps(qq11,_mm_sub_ps(_mm_add_ps(rinv11,_mm_mul_ps(krf,rsq11)),crf));
420 felec = _mm_mul_ps(qq11,_mm_sub_ps(_mm_mul_ps(rinv11,rinvsq11),krf2));
421
422 /* Update potential sum for this i atom from the interaction with this j atom. */
423 velecsum = _mm_add_ps(velecsum,velec);
424
425 fscal = felec;
426
427 /* Calculate temporary vectorial force */
428 tx = _mm_mul_ps(fscal,dx11);
429 ty = _mm_mul_ps(fscal,dy11);
430 tz = _mm_mul_ps(fscal,dz11);
431
432 /* Update vectorial force */
433 fix1 = _mm_add_ps(fix1,tx);
434 fiy1 = _mm_add_ps(fiy1,ty);
435 fiz1 = _mm_add_ps(fiz1,tz);
436
437 fjx1 = _mm_add_ps(fjx1,tx);
438 fjy1 = _mm_add_ps(fjy1,ty);
439 fjz1 = _mm_add_ps(fjz1,tz);
440
441 /**************************
442 * CALCULATE INTERACTIONS *
443 **************************/
444
445 /* REACTION-FIELD ELECTROSTATICS */
446 velec = _mm_mul_ps(qq12,_mm_sub_ps(_mm_add_ps(rinv12,_mm_mul_ps(krf,rsq12)),crf));
447 felec = _mm_mul_ps(qq12,_mm_sub_ps(_mm_mul_ps(rinv12,rinvsq12),krf2));
448
449 /* Update potential sum for this i atom from the interaction with this j atom. */
450 velecsum = _mm_add_ps(velecsum,velec);
451
452 fscal = felec;
453
454 /* Calculate temporary vectorial force */
455 tx = _mm_mul_ps(fscal,dx12);
456 ty = _mm_mul_ps(fscal,dy12);
457 tz = _mm_mul_ps(fscal,dz12);
458
459 /* Update vectorial force */
460 fix1 = _mm_add_ps(fix1,tx);
461 fiy1 = _mm_add_ps(fiy1,ty);
462 fiz1 = _mm_add_ps(fiz1,tz);
463
464 fjx2 = _mm_add_ps(fjx2,tx);
465 fjy2 = _mm_add_ps(fjy2,ty);
466 fjz2 = _mm_add_ps(fjz2,tz);
467
468 /**************************
469 * CALCULATE INTERACTIONS *
470 **************************/
471
472 /* REACTION-FIELD ELECTROSTATICS */
473 velec = _mm_mul_ps(qq20,_mm_sub_ps(_mm_add_ps(rinv20,_mm_mul_ps(krf,rsq20)),crf));
474 felec = _mm_mul_ps(qq20,_mm_sub_ps(_mm_mul_ps(rinv20,rinvsq20),krf2));
475
476 /* Update potential sum for this i atom from the interaction with this j atom. */
477 velecsum = _mm_add_ps(velecsum,velec);
478
479 fscal = felec;
480
481 /* Calculate temporary vectorial force */
482 tx = _mm_mul_ps(fscal,dx20);
483 ty = _mm_mul_ps(fscal,dy20);
484 tz = _mm_mul_ps(fscal,dz20);
485
486 /* Update vectorial force */
487 fix2 = _mm_add_ps(fix2,tx);
488 fiy2 = _mm_add_ps(fiy2,ty);
489 fiz2 = _mm_add_ps(fiz2,tz);
490
491 fjx0 = _mm_add_ps(fjx0,tx);
492 fjy0 = _mm_add_ps(fjy0,ty);
493 fjz0 = _mm_add_ps(fjz0,tz);
494
495 /**************************
496 * CALCULATE INTERACTIONS *
497 **************************/
498
499 /* REACTION-FIELD ELECTROSTATICS */
500 velec = _mm_mul_ps(qq21,_mm_sub_ps(_mm_add_ps(rinv21,_mm_mul_ps(krf,rsq21)),crf));
501 felec = _mm_mul_ps(qq21,_mm_sub_ps(_mm_mul_ps(rinv21,rinvsq21),krf2));
502
503 /* Update potential sum for this i atom from the interaction with this j atom. */
504 velecsum = _mm_add_ps(velecsum,velec);
505
506 fscal = felec;
507
508 /* Calculate temporary vectorial force */
509 tx = _mm_mul_ps(fscal,dx21);
510 ty = _mm_mul_ps(fscal,dy21);
511 tz = _mm_mul_ps(fscal,dz21);
512
513 /* Update vectorial force */
514 fix2 = _mm_add_ps(fix2,tx);
515 fiy2 = _mm_add_ps(fiy2,ty);
516 fiz2 = _mm_add_ps(fiz2,tz);
517
518 fjx1 = _mm_add_ps(fjx1,tx);
519 fjy1 = _mm_add_ps(fjy1,ty);
520 fjz1 = _mm_add_ps(fjz1,tz);
521
522 /**************************
523 * CALCULATE INTERACTIONS *
524 **************************/
525
526 /* REACTION-FIELD ELECTROSTATICS */
527 velec = _mm_mul_ps(qq22,_mm_sub_ps(_mm_add_ps(rinv22,_mm_mul_ps(krf,rsq22)),crf));
528 felec = _mm_mul_ps(qq22,_mm_sub_ps(_mm_mul_ps(rinv22,rinvsq22),krf2));
529
530 /* Update potential sum for this i atom from the interaction with this j atom. */
531 velecsum = _mm_add_ps(velecsum,velec);
532
533 fscal = felec;
534
535 /* Calculate temporary vectorial force */
536 tx = _mm_mul_ps(fscal,dx22);
537 ty = _mm_mul_ps(fscal,dy22);
538 tz = _mm_mul_ps(fscal,dz22);
539
540 /* Update vectorial force */
541 fix2 = _mm_add_ps(fix2,tx);
542 fiy2 = _mm_add_ps(fiy2,ty);
543 fiz2 = _mm_add_ps(fiz2,tz);
544
545 fjx2 = _mm_add_ps(fjx2,tx);
546 fjy2 = _mm_add_ps(fjy2,ty);
547 fjz2 = _mm_add_ps(fjz2,tz);
548
549 fjptrA = f+j_coord_offsetA;
550 fjptrB = f+j_coord_offsetB;
551 fjptrC = f+j_coord_offsetC;
552 fjptrD = f+j_coord_offsetD;
553
554 gmx_mm_decrement_3rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
555 fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
556
557 /* Inner loop uses 300 flops */
558 }
559
560 if(jidx<j_index_end)
561 {
562
563 /* Get j neighbor index, and coordinate index */
564 jnrlistA = jjnr[jidx];
565 jnrlistB = jjnr[jidx+1];
566 jnrlistC = jjnr[jidx+2];
567 jnrlistD = jjnr[jidx+3];
568 /* Sign of each element will be negative for non-real atoms.
569 * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
570 * so use it as val = _mm_andnot_ps(mask,val) to clear dummy entries.
571 */
572 dummy_mask = gmx_mm_castsi128_ps_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
573 jnrA = (jnrlistA>=0) ? jnrlistA : 0;
574 jnrB = (jnrlistB>=0) ? jnrlistB : 0;
575 jnrC = (jnrlistC>=0) ? jnrlistC : 0;
576 jnrD = (jnrlistD>=0) ? jnrlistD : 0;
577 j_coord_offsetA = DIM3*jnrA;
578 j_coord_offsetB = DIM3*jnrB;
579 j_coord_offsetC = DIM3*jnrC;
580 j_coord_offsetD = DIM3*jnrD;
581
582 /* load j atom coordinates */
583 gmx_mm_load_3rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
584 x+j_coord_offsetC,x+j_coord_offsetD,
585 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
586
587 /* Calculate displacement vector */
588 dx00 = _mm_sub_ps(ix0,jx0);
589 dy00 = _mm_sub_ps(iy0,jy0);
590 dz00 = _mm_sub_ps(iz0,jz0);
591 dx01 = _mm_sub_ps(ix0,jx1);
592 dy01 = _mm_sub_ps(iy0,jy1);
593 dz01 = _mm_sub_ps(iz0,jz1);
594 dx02 = _mm_sub_ps(ix0,jx2);
595 dy02 = _mm_sub_ps(iy0,jy2);
596 dz02 = _mm_sub_ps(iz0,jz2);
597 dx10 = _mm_sub_ps(ix1,jx0);
598 dy10 = _mm_sub_ps(iy1,jy0);
599 dz10 = _mm_sub_ps(iz1,jz0);
600 dx11 = _mm_sub_ps(ix1,jx1);
601 dy11 = _mm_sub_ps(iy1,jy1);
602 dz11 = _mm_sub_ps(iz1,jz1);
603 dx12 = _mm_sub_ps(ix1,jx2);
604 dy12 = _mm_sub_ps(iy1,jy2);
605 dz12 = _mm_sub_ps(iz1,jz2);
606 dx20 = _mm_sub_ps(ix2,jx0);
607 dy20 = _mm_sub_ps(iy2,jy0);
608 dz20 = _mm_sub_ps(iz2,jz0);
609 dx21 = _mm_sub_ps(ix2,jx1);
610 dy21 = _mm_sub_ps(iy2,jy1);
611 dz21 = _mm_sub_ps(iz2,jz1);
612 dx22 = _mm_sub_ps(ix2,jx2);
613 dy22 = _mm_sub_ps(iy2,jy2);
614 dz22 = _mm_sub_ps(iz2,jz2);
615
616 /* Calculate squared distance and things based on it */
617 rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
618 rsq01 = gmx_mm_calc_rsq_ps(dx01,dy01,dz01);
619 rsq02 = gmx_mm_calc_rsq_ps(dx02,dy02,dz02);
620 rsq10 = gmx_mm_calc_rsq_ps(dx10,dy10,dz10);
621 rsq11 = gmx_mm_calc_rsq_ps(dx11,dy11,dz11);
622 rsq12 = gmx_mm_calc_rsq_ps(dx12,dy12,dz12);
623 rsq20 = gmx_mm_calc_rsq_ps(dx20,dy20,dz20);
624 rsq21 = gmx_mm_calc_rsq_ps(dx21,dy21,dz21);
625 rsq22 = gmx_mm_calc_rsq_ps(dx22,dy22,dz22);
626
627 rinv00 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq00);
628 rinv01 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq01);
629 rinv02 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq02);
630 rinv10 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq10);
631 rinv11 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq11);
632 rinv12 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq12);
633 rinv20 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq20);
634 rinv21 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq21);
635 rinv22 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq22);
636
637 rinvsq00 = _mm_mul_ps(rinv00,rinv00);
638 rinvsq01 = _mm_mul_ps(rinv01,rinv01);
639 rinvsq02 = _mm_mul_ps(rinv02,rinv02);
640 rinvsq10 = _mm_mul_ps(rinv10,rinv10);
641 rinvsq11 = _mm_mul_ps(rinv11,rinv11);
642 rinvsq12 = _mm_mul_ps(rinv12,rinv12);
643 rinvsq20 = _mm_mul_ps(rinv20,rinv20);
644 rinvsq21 = _mm_mul_ps(rinv21,rinv21);
645 rinvsq22 = _mm_mul_ps(rinv22,rinv22);
646
647 fjx0 = _mm_setzero_ps();
648 fjy0 = _mm_setzero_ps();
649 fjz0 = _mm_setzero_ps();
650 fjx1 = _mm_setzero_ps();
651 fjy1 = _mm_setzero_ps();
652 fjz1 = _mm_setzero_ps();
653 fjx2 = _mm_setzero_ps();
654 fjy2 = _mm_setzero_ps();
655 fjz2 = _mm_setzero_ps();
656
657 /**************************
658 * CALCULATE INTERACTIONS *
659 **************************/
660
661 /* REACTION-FIELD ELECTROSTATICS */
662 velec = _mm_mul_ps(qq00,_mm_sub_ps(_mm_add_ps(rinv00,_mm_mul_ps(krf,rsq00)),crf));
663 felec = _mm_mul_ps(qq00,_mm_sub_ps(_mm_mul_ps(rinv00,rinvsq00),krf2));
664
665 /* LENNARD-JONES DISPERSION/REPULSION */
666
667 rinvsix = _mm_mul_ps(_mm_mul_ps(rinvsq00,rinvsq00),rinvsq00);
668 vvdw6 = _mm_mul_ps(c6_00,rinvsix);
669 vvdw12 = _mm_mul_ps(c12_00,_mm_mul_ps(rinvsix,rinvsix));
670 vvdw = _mm_sub_ps( _mm_mul_ps(vvdw12,one_twelfth) , _mm_mul_ps(vvdw6,one_sixth) );
671 fvdw = _mm_mul_ps(_mm_sub_ps(vvdw12,vvdw6),rinvsq00);
672
673 /* Update potential sum for this i atom from the interaction with this j atom. */
674 velec = _mm_andnot_ps(dummy_mask,velec);
675 velecsum = _mm_add_ps(velecsum,velec);
676 vvdw = _mm_andnot_ps(dummy_mask,vvdw);
677 vvdwsum = _mm_add_ps(vvdwsum,vvdw);
678
679 fscal = _mm_add_ps(felec,fvdw);
680
681 fscal = _mm_andnot_ps(dummy_mask,fscal);
682
683 /* Calculate temporary vectorial force */
684 tx = _mm_mul_ps(fscal,dx00);
685 ty = _mm_mul_ps(fscal,dy00);
686 tz = _mm_mul_ps(fscal,dz00);
687
688 /* Update vectorial force */
689 fix0 = _mm_add_ps(fix0,tx);
690 fiy0 = _mm_add_ps(fiy0,ty);
691 fiz0 = _mm_add_ps(fiz0,tz);
692
693 fjx0 = _mm_add_ps(fjx0,tx);
694 fjy0 = _mm_add_ps(fjy0,ty);
695 fjz0 = _mm_add_ps(fjz0,tz);
696
697 /**************************
698 * CALCULATE INTERACTIONS *
699 **************************/
700
701 /* REACTION-FIELD ELECTROSTATICS */
702 velec = _mm_mul_ps(qq01,_mm_sub_ps(_mm_add_ps(rinv01,_mm_mul_ps(krf,rsq01)),crf));
703 felec = _mm_mul_ps(qq01,_mm_sub_ps(_mm_mul_ps(rinv01,rinvsq01),krf2));
704
705 /* Update potential sum for this i atom from the interaction with this j atom. */
706 velec = _mm_andnot_ps(dummy_mask,velec);
707 velecsum = _mm_add_ps(velecsum,velec);
708
709 fscal = felec;
710
711 fscal = _mm_andnot_ps(dummy_mask,fscal);
712
713 /* Calculate temporary vectorial force */
714 tx = _mm_mul_ps(fscal,dx01);
715 ty = _mm_mul_ps(fscal,dy01);
716 tz = _mm_mul_ps(fscal,dz01);
717
718 /* Update vectorial force */
719 fix0 = _mm_add_ps(fix0,tx);
720 fiy0 = _mm_add_ps(fiy0,ty);
721 fiz0 = _mm_add_ps(fiz0,tz);
722
723 fjx1 = _mm_add_ps(fjx1,tx);
724 fjy1 = _mm_add_ps(fjy1,ty);
725 fjz1 = _mm_add_ps(fjz1,tz);
726
727 /**************************
728 * CALCULATE INTERACTIONS *
729 **************************/
730
731 /* REACTION-FIELD ELECTROSTATICS */
732 velec = _mm_mul_ps(qq02,_mm_sub_ps(_mm_add_ps(rinv02,_mm_mul_ps(krf,rsq02)),crf));
733 felec = _mm_mul_ps(qq02,_mm_sub_ps(_mm_mul_ps(rinv02,rinvsq02),krf2));
734
735 /* Update potential sum for this i atom from the interaction with this j atom. */
736 velec = _mm_andnot_ps(dummy_mask,velec);
737 velecsum = _mm_add_ps(velecsum,velec);
738
739 fscal = felec;
740
741 fscal = _mm_andnot_ps(dummy_mask,fscal);
742
743 /* Calculate temporary vectorial force */
744 tx = _mm_mul_ps(fscal,dx02);
745 ty = _mm_mul_ps(fscal,dy02);
746 tz = _mm_mul_ps(fscal,dz02);
747
748 /* Update vectorial force */
749 fix0 = _mm_add_ps(fix0,tx);
750 fiy0 = _mm_add_ps(fiy0,ty);
751 fiz0 = _mm_add_ps(fiz0,tz);
752
753 fjx2 = _mm_add_ps(fjx2,tx);
754 fjy2 = _mm_add_ps(fjy2,ty);
755 fjz2 = _mm_add_ps(fjz2,tz);
756
757 /**************************
758 * CALCULATE INTERACTIONS *
759 **************************/
760
761 /* REACTION-FIELD ELECTROSTATICS */
762 velec = _mm_mul_ps(qq10,_mm_sub_ps(_mm_add_ps(rinv10,_mm_mul_ps(krf,rsq10)),crf));
763 felec = _mm_mul_ps(qq10,_mm_sub_ps(_mm_mul_ps(rinv10,rinvsq10),krf2));
764
765 /* Update potential sum for this i atom from the interaction with this j atom. */
766 velec = _mm_andnot_ps(dummy_mask,velec);
767 velecsum = _mm_add_ps(velecsum,velec);
768
769 fscal = felec;
770
771 fscal = _mm_andnot_ps(dummy_mask,fscal);
772
773 /* Calculate temporary vectorial force */
774 tx = _mm_mul_ps(fscal,dx10);
775 ty = _mm_mul_ps(fscal,dy10);
776 tz = _mm_mul_ps(fscal,dz10);
777
778 /* Update vectorial force */
779 fix1 = _mm_add_ps(fix1,tx);
780 fiy1 = _mm_add_ps(fiy1,ty);
781 fiz1 = _mm_add_ps(fiz1,tz);
782
783 fjx0 = _mm_add_ps(fjx0,tx);
784 fjy0 = _mm_add_ps(fjy0,ty);
785 fjz0 = _mm_add_ps(fjz0,tz);
786
787 /**************************
788 * CALCULATE INTERACTIONS *
789 **************************/
790
791 /* REACTION-FIELD ELECTROSTATICS */
792 velec = _mm_mul_ps(qq11,_mm_sub_ps(_mm_add_ps(rinv11,_mm_mul_ps(krf,rsq11)),crf));
793 felec = _mm_mul_ps(qq11,_mm_sub_ps(_mm_mul_ps(rinv11,rinvsq11),krf2));
794
795 /* Update potential sum for this i atom from the interaction with this j atom. */
796 velec = _mm_andnot_ps(dummy_mask,velec);
797 velecsum = _mm_add_ps(velecsum,velec);
798
799 fscal = felec;
800
801 fscal = _mm_andnot_ps(dummy_mask,fscal);
802
803 /* Calculate temporary vectorial force */
804 tx = _mm_mul_ps(fscal,dx11);
805 ty = _mm_mul_ps(fscal,dy11);
806 tz = _mm_mul_ps(fscal,dz11);
807
808 /* Update vectorial force */
809 fix1 = _mm_add_ps(fix1,tx);
810 fiy1 = _mm_add_ps(fiy1,ty);
811 fiz1 = _mm_add_ps(fiz1,tz);
812
813 fjx1 = _mm_add_ps(fjx1,tx);
814 fjy1 = _mm_add_ps(fjy1,ty);
815 fjz1 = _mm_add_ps(fjz1,tz);
816
817 /**************************
818 * CALCULATE INTERACTIONS *
819 **************************/
820
821 /* REACTION-FIELD ELECTROSTATICS */
822 velec = _mm_mul_ps(qq12,_mm_sub_ps(_mm_add_ps(rinv12,_mm_mul_ps(krf,rsq12)),crf));
823 felec = _mm_mul_ps(qq12,_mm_sub_ps(_mm_mul_ps(rinv12,rinvsq12),krf2));
824
825 /* Update potential sum for this i atom from the interaction with this j atom. */
826 velec = _mm_andnot_ps(dummy_mask,velec);
827 velecsum = _mm_add_ps(velecsum,velec);
828
829 fscal = felec;
830
831 fscal = _mm_andnot_ps(dummy_mask,fscal);
832
833 /* Calculate temporary vectorial force */
834 tx = _mm_mul_ps(fscal,dx12);
835 ty = _mm_mul_ps(fscal,dy12);
836 tz = _mm_mul_ps(fscal,dz12);
837
838 /* Update vectorial force */
839 fix1 = _mm_add_ps(fix1,tx);
840 fiy1 = _mm_add_ps(fiy1,ty);
841 fiz1 = _mm_add_ps(fiz1,tz);
842
843 fjx2 = _mm_add_ps(fjx2,tx);
844 fjy2 = _mm_add_ps(fjy2,ty);
845 fjz2 = _mm_add_ps(fjz2,tz);
846
847 /**************************
848 * CALCULATE INTERACTIONS *
849 **************************/
850
851 /* REACTION-FIELD ELECTROSTATICS */
852 velec = _mm_mul_ps(qq20,_mm_sub_ps(_mm_add_ps(rinv20,_mm_mul_ps(krf,rsq20)),crf));
853 felec = _mm_mul_ps(qq20,_mm_sub_ps(_mm_mul_ps(rinv20,rinvsq20),krf2));
854
855 /* Update potential sum for this i atom from the interaction with this j atom. */
856 velec = _mm_andnot_ps(dummy_mask,velec);
857 velecsum = _mm_add_ps(velecsum,velec);
858
859 fscal = felec;
860
861 fscal = _mm_andnot_ps(dummy_mask,fscal);
862
863 /* Calculate temporary vectorial force */
864 tx = _mm_mul_ps(fscal,dx20);
865 ty = _mm_mul_ps(fscal,dy20);
866 tz = _mm_mul_ps(fscal,dz20);
867
868 /* Update vectorial force */
869 fix2 = _mm_add_ps(fix2,tx);
870 fiy2 = _mm_add_ps(fiy2,ty);
871 fiz2 = _mm_add_ps(fiz2,tz);
872
873 fjx0 = _mm_add_ps(fjx0,tx);
874 fjy0 = _mm_add_ps(fjy0,ty);
875 fjz0 = _mm_add_ps(fjz0,tz);
876
877 /**************************
878 * CALCULATE INTERACTIONS *
879 **************************/
880
881 /* REACTION-FIELD ELECTROSTATICS */
882 velec = _mm_mul_ps(qq21,_mm_sub_ps(_mm_add_ps(rinv21,_mm_mul_ps(krf,rsq21)),crf));
883 felec = _mm_mul_ps(qq21,_mm_sub_ps(_mm_mul_ps(rinv21,rinvsq21),krf2));
884
885 /* Update potential sum for this i atom from the interaction with this j atom. */
886 velec = _mm_andnot_ps(dummy_mask,velec);
887 velecsum = _mm_add_ps(velecsum,velec);
888
889 fscal = felec;
890
891 fscal = _mm_andnot_ps(dummy_mask,fscal);
892
893 /* Calculate temporary vectorial force */
894 tx = _mm_mul_ps(fscal,dx21);
895 ty = _mm_mul_ps(fscal,dy21);
896 tz = _mm_mul_ps(fscal,dz21);
897
898 /* Update vectorial force */
899 fix2 = _mm_add_ps(fix2,tx);
900 fiy2 = _mm_add_ps(fiy2,ty);
901 fiz2 = _mm_add_ps(fiz2,tz);
902
903 fjx1 = _mm_add_ps(fjx1,tx);
904 fjy1 = _mm_add_ps(fjy1,ty);
905 fjz1 = _mm_add_ps(fjz1,tz);
906
907 /**************************
908 * CALCULATE INTERACTIONS *
909 **************************/
910
911 /* REACTION-FIELD ELECTROSTATICS */
912 velec = _mm_mul_ps(qq22,_mm_sub_ps(_mm_add_ps(rinv22,_mm_mul_ps(krf,rsq22)),crf));
913 felec = _mm_mul_ps(qq22,_mm_sub_ps(_mm_mul_ps(rinv22,rinvsq22),krf2));
914
915 /* Update potential sum for this i atom from the interaction with this j atom. */
916 velec = _mm_andnot_ps(dummy_mask,velec);
917 velecsum = _mm_add_ps(velecsum,velec);
918
919 fscal = felec;
920
921 fscal = _mm_andnot_ps(dummy_mask,fscal);
922
923 /* Calculate temporary vectorial force */
924 tx = _mm_mul_ps(fscal,dx22);
925 ty = _mm_mul_ps(fscal,dy22);
926 tz = _mm_mul_ps(fscal,dz22);
927
928 /* Update vectorial force */
929 fix2 = _mm_add_ps(fix2,tx);
930 fiy2 = _mm_add_ps(fiy2,ty);
931 fiz2 = _mm_add_ps(fiz2,tz);
932
933 fjx2 = _mm_add_ps(fjx2,tx);
934 fjy2 = _mm_add_ps(fjy2,ty);
935 fjz2 = _mm_add_ps(fjz2,tz);
936
937 fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
938 fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
939 fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
940 fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
941
942 gmx_mm_decrement_3rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
943 fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
944
945 /* Inner loop uses 300 flops */
946 }
947
948 /* End of innermost loop */
949
950 gmx_mm_update_iforce_3atom_swizzle_ps(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
951 f+i_coord_offset,fshift+i_shift_offset);
952
953 ggid = gid[iidx];
954 /* Update potential energies */
955 gmx_mm_update_1pot_ps(velecsum,kernel_data->energygrp_elec+ggid);
956 gmx_mm_update_1pot_ps(vvdwsum,kernel_data->energygrp_vdw+ggid);
957
958 /* Increment number of inner iterations */
959 inneriter += j_index_end - j_index_start;
960
961 /* Outer loop uses 20 flops */
962 }
963
964 /* Increment number of outer iterations */
965 outeriter += nri;
966
967 /* Update outer/inner flops */
968
969 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*300)(nrnb)->n[eNR_NBKERNEL_ELEC_VDW_W3W3_VF] += outeriter*20 +
inneriter*300
;
970}
971/*
972 * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_sse4_1_single
973 * Electrostatics interaction: ReactionField
974 * VdW interaction: LennardJones
975 * Geometry: Water3-Water3
976 * Calculate force/pot: Force
977 */
978void
979nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_sse4_1_single
980 (t_nblist * gmx_restrict nlist,
981 rvec * gmx_restrict xx,
982 rvec * gmx_restrict ff,
983 t_forcerec * gmx_restrict fr,
984 t_mdatoms * gmx_restrict mdatoms,
985 nb_kernel_data_t gmx_unused__attribute__ ((unused)) * gmx_restrict kernel_data,
986 t_nrnb * gmx_restrict nrnb)
987{
988 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
989 * just 0 for non-waters.
990 * Suffixes A,B,C,D refer to j loop unrolling done with SSE, e.g. for the four different
991 * jnr indices corresponding to data put in the four positions in the SIMD register.
992 */
993 int i_shift_offset,i_coord_offset,outeriter,inneriter;
994 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
995 int jnrA,jnrB,jnrC,jnrD;
996 int jnrlistA,jnrlistB,jnrlistC,jnrlistD;
997 int j_coord_offsetA,j_coord_offsetB,j_coord_offsetC,j_coord_offsetD;
998 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
999 real rcutoff_scalar;
1000 real *shiftvec,*fshift,*x,*f;
1001 real *fjptrA,*fjptrB,*fjptrC,*fjptrD;
1002 real scratch[4*DIM3];
1003 __m128 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1004 int vdwioffset0;
1005 __m128 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1006 int vdwioffset1;
1007 __m128 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1008 int vdwioffset2;
1009 __m128 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1010 int vdwjidx0A,vdwjidx0B,vdwjidx0C,vdwjidx0D;
1011 __m128 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1012 int vdwjidx1A,vdwjidx1B,vdwjidx1C,vdwjidx1D;
1013 __m128 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1014 int vdwjidx2A,vdwjidx2B,vdwjidx2C,vdwjidx2D;
1015 __m128 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1016 __m128 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1017 __m128 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
1018 __m128 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
1019 __m128 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
1020 __m128 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1021 __m128 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1022 __m128 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
1023 __m128 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1024 __m128 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1025 __m128 velec,felec,velecsum,facel,crf,krf,krf2;
1026 real *charge;
1027 int nvdwtype;
1028 __m128 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
1029 int *vdwtype;
1030 real *vdwparam;
1031 __m128 one_sixth = _mm_set1_ps(1.0/6.0);
1032 __m128 one_twelfth = _mm_set1_ps(1.0/12.0);
1033 __m128 dummy_mask,cutoff_mask;
1034 __m128 signbit = _mm_castsi128_ps( _mm_set1_epi32(0x80000000) );
1035 __m128 one = _mm_set1_ps(1.0);
1036 __m128 two = _mm_set1_ps(2.0);
1037 x = xx[0];
1038 f = ff[0];
1039
1040 nri = nlist->nri;
1041 iinr = nlist->iinr;
1042 jindex = nlist->jindex;
1043 jjnr = nlist->jjnr;
1044 shiftidx = nlist->shift;
1045 gid = nlist->gid;
1046 shiftvec = fr->shift_vec[0];
1047 fshift = fr->fshift[0];
1048 facel = _mm_set1_ps(fr->epsfac);
1049 charge = mdatoms->chargeA;
1050 krf = _mm_set1_ps(fr->ic->k_rf);
1051 krf2 = _mm_set1_ps(fr->ic->k_rf*2.0);
1052 crf = _mm_set1_ps(fr->ic->c_rf);
1053 nvdwtype = fr->ntype;
1054 vdwparam = fr->nbfp;
1055 vdwtype = mdatoms->typeA;
1056
1057 /* Setup water-specific parameters */
1058 inr = nlist->iinr[0];
1059 iq0 = _mm_mul_ps(facel,_mm_set1_ps(charge[inr+0]));
1060 iq1 = _mm_mul_ps(facel,_mm_set1_ps(charge[inr+1]));
1061 iq2 = _mm_mul_ps(facel,_mm_set1_ps(charge[inr+2]));
1062 vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
1063
1064 jq0 = _mm_set1_ps(charge[inr+0]);
1065 jq1 = _mm_set1_ps(charge[inr+1]);
1066 jq2 = _mm_set1_ps(charge[inr+2]);
1067 vdwjidx0A = 2*vdwtype[inr+0];
1068 qq00 = _mm_mul_ps(iq0,jq0);
1069 c6_00 = _mm_set1_ps(vdwparam[vdwioffset0+vdwjidx0A]);
1070 c12_00 = _mm_set1_ps(vdwparam[vdwioffset0+vdwjidx0A+1]);
1071 qq01 = _mm_mul_ps(iq0,jq1);
1072 qq02 = _mm_mul_ps(iq0,jq2);
1073 qq10 = _mm_mul_ps(iq1,jq0);
1074 qq11 = _mm_mul_ps(iq1,jq1);
1075 qq12 = _mm_mul_ps(iq1,jq2);
1076 qq20 = _mm_mul_ps(iq2,jq0);
1077 qq21 = _mm_mul_ps(iq2,jq1);
1078 qq22 = _mm_mul_ps(iq2,jq2);
1079
1080 /* Avoid stupid compiler warnings */
1081 jnrA = jnrB = jnrC = jnrD = 0;
Value stored to 'jnrA' is never read
1082 j_coord_offsetA = 0;
1083 j_coord_offsetB = 0;
1084 j_coord_offsetC = 0;
1085 j_coord_offsetD = 0;
1086
1087 outeriter = 0;
1088 inneriter = 0;
1089
1090 for(iidx=0;iidx<4*DIM3;iidx++)
1091 {
1092 scratch[iidx] = 0.0;
1093 }
1094
1095 /* Start outer loop over neighborlists */
1096 for(iidx=0; iidx<nri; iidx++)
1097 {
1098 /* Load shift vector for this list */
1099 i_shift_offset = DIM3*shiftidx[iidx];
1100
1101 /* Load limits for loop over neighbors */
1102 j_index_start = jindex[iidx];
1103 j_index_end = jindex[iidx+1];
1104
1105 /* Get outer coordinate index */
1106 inr = iinr[iidx];
1107 i_coord_offset = DIM3*inr;
1108
1109 /* Load i particle coords and add shift vector */
1110 gmx_mm_load_shift_and_3rvec_broadcast_ps(shiftvec+i_shift_offset,x+i_coord_offset,
1111 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
1112
1113 fix0 = _mm_setzero_ps();
1114 fiy0 = _mm_setzero_ps();
1115 fiz0 = _mm_setzero_ps();
1116 fix1 = _mm_setzero_ps();
1117 fiy1 = _mm_setzero_ps();
1118 fiz1 = _mm_setzero_ps();
1119 fix2 = _mm_setzero_ps();
1120 fiy2 = _mm_setzero_ps();
1121 fiz2 = _mm_setzero_ps();
1122
1123 /* Start inner kernel loop */
1124 for(jidx=j_index_start; jidx<j_index_end && jjnr[jidx+3]>=0; jidx+=4)
1125 {
1126
1127 /* Get j neighbor index, and coordinate index */
1128 jnrA = jjnr[jidx];
1129 jnrB = jjnr[jidx+1];
1130 jnrC = jjnr[jidx+2];
1131 jnrD = jjnr[jidx+3];
1132 j_coord_offsetA = DIM3*jnrA;
1133 j_coord_offsetB = DIM3*jnrB;
1134 j_coord_offsetC = DIM3*jnrC;
1135 j_coord_offsetD = DIM3*jnrD;
1136
1137 /* load j atom coordinates */
1138 gmx_mm_load_3rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
1139 x+j_coord_offsetC,x+j_coord_offsetD,
1140 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1141
1142 /* Calculate displacement vector */
1143 dx00 = _mm_sub_ps(ix0,jx0);
1144 dy00 = _mm_sub_ps(iy0,jy0);
1145 dz00 = _mm_sub_ps(iz0,jz0);
1146 dx01 = _mm_sub_ps(ix0,jx1);
1147 dy01 = _mm_sub_ps(iy0,jy1);
1148 dz01 = _mm_sub_ps(iz0,jz1);
1149 dx02 = _mm_sub_ps(ix0,jx2);
1150 dy02 = _mm_sub_ps(iy0,jy2);
1151 dz02 = _mm_sub_ps(iz0,jz2);
1152 dx10 = _mm_sub_ps(ix1,jx0);
1153 dy10 = _mm_sub_ps(iy1,jy0);
1154 dz10 = _mm_sub_ps(iz1,jz0);
1155 dx11 = _mm_sub_ps(ix1,jx1);
1156 dy11 = _mm_sub_ps(iy1,jy1);
1157 dz11 = _mm_sub_ps(iz1,jz1);
1158 dx12 = _mm_sub_ps(ix1,jx2);
1159 dy12 = _mm_sub_ps(iy1,jy2);
1160 dz12 = _mm_sub_ps(iz1,jz2);
1161 dx20 = _mm_sub_ps(ix2,jx0);
1162 dy20 = _mm_sub_ps(iy2,jy0);
1163 dz20 = _mm_sub_ps(iz2,jz0);
1164 dx21 = _mm_sub_ps(ix2,jx1);
1165 dy21 = _mm_sub_ps(iy2,jy1);
1166 dz21 = _mm_sub_ps(iz2,jz1);
1167 dx22 = _mm_sub_ps(ix2,jx2);
1168 dy22 = _mm_sub_ps(iy2,jy2);
1169 dz22 = _mm_sub_ps(iz2,jz2);
1170
1171 /* Calculate squared distance and things based on it */
1172 rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
1173 rsq01 = gmx_mm_calc_rsq_ps(dx01,dy01,dz01);
1174 rsq02 = gmx_mm_calc_rsq_ps(dx02,dy02,dz02);
1175 rsq10 = gmx_mm_calc_rsq_ps(dx10,dy10,dz10);
1176 rsq11 = gmx_mm_calc_rsq_ps(dx11,dy11,dz11);
1177 rsq12 = gmx_mm_calc_rsq_ps(dx12,dy12,dz12);
1178 rsq20 = gmx_mm_calc_rsq_ps(dx20,dy20,dz20);
1179 rsq21 = gmx_mm_calc_rsq_ps(dx21,dy21,dz21);
1180 rsq22 = gmx_mm_calc_rsq_ps(dx22,dy22,dz22);
1181
1182 rinv00 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq00);
1183 rinv01 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq01);
1184 rinv02 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq02);
1185 rinv10 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq10);
1186 rinv11 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq11);
1187 rinv12 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq12);
1188 rinv20 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq20);
1189 rinv21 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq21);
1190 rinv22 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq22);
1191
1192 rinvsq00 = _mm_mul_ps(rinv00,rinv00);
1193 rinvsq01 = _mm_mul_ps(rinv01,rinv01);
1194 rinvsq02 = _mm_mul_ps(rinv02,rinv02);
1195 rinvsq10 = _mm_mul_ps(rinv10,rinv10);
1196 rinvsq11 = _mm_mul_ps(rinv11,rinv11);
1197 rinvsq12 = _mm_mul_ps(rinv12,rinv12);
1198 rinvsq20 = _mm_mul_ps(rinv20,rinv20);
1199 rinvsq21 = _mm_mul_ps(rinv21,rinv21);
1200 rinvsq22 = _mm_mul_ps(rinv22,rinv22);
1201
1202 fjx0 = _mm_setzero_ps();
1203 fjy0 = _mm_setzero_ps();
1204 fjz0 = _mm_setzero_ps();
1205 fjx1 = _mm_setzero_ps();
1206 fjy1 = _mm_setzero_ps();
1207 fjz1 = _mm_setzero_ps();
1208 fjx2 = _mm_setzero_ps();
1209 fjy2 = _mm_setzero_ps();
1210 fjz2 = _mm_setzero_ps();
1211
1212 /**************************
1213 * CALCULATE INTERACTIONS *
1214 **************************/
1215
1216 /* REACTION-FIELD ELECTROSTATICS */
1217 felec = _mm_mul_ps(qq00,_mm_sub_ps(_mm_mul_ps(rinv00,rinvsq00),krf2));
1218
1219 /* LENNARD-JONES DISPERSION/REPULSION */
1220
1221 rinvsix = _mm_mul_ps(_mm_mul_ps(rinvsq00,rinvsq00),rinvsq00);
1222 fvdw = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(c12_00,rinvsix),c6_00),_mm_mul_ps(rinvsix,rinvsq00));
1223
1224 fscal = _mm_add_ps(felec,fvdw);
1225
1226 /* Calculate temporary vectorial force */
1227 tx = _mm_mul_ps(fscal,dx00);
1228 ty = _mm_mul_ps(fscal,dy00);
1229 tz = _mm_mul_ps(fscal,dz00);
1230
1231 /* Update vectorial force */
1232 fix0 = _mm_add_ps(fix0,tx);
1233 fiy0 = _mm_add_ps(fiy0,ty);
1234 fiz0 = _mm_add_ps(fiz0,tz);
1235
1236 fjx0 = _mm_add_ps(fjx0,tx);
1237 fjy0 = _mm_add_ps(fjy0,ty);
1238 fjz0 = _mm_add_ps(fjz0,tz);
1239
1240 /**************************
1241 * CALCULATE INTERACTIONS *
1242 **************************/
1243
1244 /* REACTION-FIELD ELECTROSTATICS */
1245 felec = _mm_mul_ps(qq01,_mm_sub_ps(_mm_mul_ps(rinv01,rinvsq01),krf2));
1246
1247 fscal = felec;
1248
1249 /* Calculate temporary vectorial force */
1250 tx = _mm_mul_ps(fscal,dx01);
1251 ty = _mm_mul_ps(fscal,dy01);
1252 tz = _mm_mul_ps(fscal,dz01);
1253
1254 /* Update vectorial force */
1255 fix0 = _mm_add_ps(fix0,tx);
1256 fiy0 = _mm_add_ps(fiy0,ty);
1257 fiz0 = _mm_add_ps(fiz0,tz);
1258
1259 fjx1 = _mm_add_ps(fjx1,tx);
1260 fjy1 = _mm_add_ps(fjy1,ty);
1261 fjz1 = _mm_add_ps(fjz1,tz);
1262
1263 /**************************
1264 * CALCULATE INTERACTIONS *
1265 **************************/
1266
1267 /* REACTION-FIELD ELECTROSTATICS */
1268 felec = _mm_mul_ps(qq02,_mm_sub_ps(_mm_mul_ps(rinv02,rinvsq02),krf2));
1269
1270 fscal = felec;
1271
1272 /* Calculate temporary vectorial force */
1273 tx = _mm_mul_ps(fscal,dx02);
1274 ty = _mm_mul_ps(fscal,dy02);
1275 tz = _mm_mul_ps(fscal,dz02);
1276
1277 /* Update vectorial force */
1278 fix0 = _mm_add_ps(fix0,tx);
1279 fiy0 = _mm_add_ps(fiy0,ty);
1280 fiz0 = _mm_add_ps(fiz0,tz);
1281
1282 fjx2 = _mm_add_ps(fjx2,tx);
1283 fjy2 = _mm_add_ps(fjy2,ty);
1284 fjz2 = _mm_add_ps(fjz2,tz);
1285
1286 /**************************
1287 * CALCULATE INTERACTIONS *
1288 **************************/
1289
1290 /* REACTION-FIELD ELECTROSTATICS */
1291 felec = _mm_mul_ps(qq10,_mm_sub_ps(_mm_mul_ps(rinv10,rinvsq10),krf2));
1292
1293 fscal = felec;
1294
1295 /* Calculate temporary vectorial force */
1296 tx = _mm_mul_ps(fscal,dx10);
1297 ty = _mm_mul_ps(fscal,dy10);
1298 tz = _mm_mul_ps(fscal,dz10);
1299
1300 /* Update vectorial force */
1301 fix1 = _mm_add_ps(fix1,tx);
1302 fiy1 = _mm_add_ps(fiy1,ty);
1303 fiz1 = _mm_add_ps(fiz1,tz);
1304
1305 fjx0 = _mm_add_ps(fjx0,tx);
1306 fjy0 = _mm_add_ps(fjy0,ty);
1307 fjz0 = _mm_add_ps(fjz0,tz);
1308
1309 /**************************
1310 * CALCULATE INTERACTIONS *
1311 **************************/
1312
1313 /* REACTION-FIELD ELECTROSTATICS */
1314 felec = _mm_mul_ps(qq11,_mm_sub_ps(_mm_mul_ps(rinv11,rinvsq11),krf2));
1315
1316 fscal = felec;
1317
1318 /* Calculate temporary vectorial force */
1319 tx = _mm_mul_ps(fscal,dx11);
1320 ty = _mm_mul_ps(fscal,dy11);
1321 tz = _mm_mul_ps(fscal,dz11);
1322
1323 /* Update vectorial force */
1324 fix1 = _mm_add_ps(fix1,tx);
1325 fiy1 = _mm_add_ps(fiy1,ty);
1326 fiz1 = _mm_add_ps(fiz1,tz);
1327
1328 fjx1 = _mm_add_ps(fjx1,tx);
1329 fjy1 = _mm_add_ps(fjy1,ty);
1330 fjz1 = _mm_add_ps(fjz1,tz);
1331
1332 /**************************
1333 * CALCULATE INTERACTIONS *
1334 **************************/
1335
1336 /* REACTION-FIELD ELECTROSTATICS */
1337 felec = _mm_mul_ps(qq12,_mm_sub_ps(_mm_mul_ps(rinv12,rinvsq12),krf2));
1338
1339 fscal = felec;
1340
1341 /* Calculate temporary vectorial force */
1342 tx = _mm_mul_ps(fscal,dx12);
1343 ty = _mm_mul_ps(fscal,dy12);
1344 tz = _mm_mul_ps(fscal,dz12);
1345
1346 /* Update vectorial force */
1347 fix1 = _mm_add_ps(fix1,tx);
1348 fiy1 = _mm_add_ps(fiy1,ty);
1349 fiz1 = _mm_add_ps(fiz1,tz);
1350
1351 fjx2 = _mm_add_ps(fjx2,tx);
1352 fjy2 = _mm_add_ps(fjy2,ty);
1353 fjz2 = _mm_add_ps(fjz2,tz);
1354
1355 /**************************
1356 * CALCULATE INTERACTIONS *
1357 **************************/
1358
1359 /* REACTION-FIELD ELECTROSTATICS */
1360 felec = _mm_mul_ps(qq20,_mm_sub_ps(_mm_mul_ps(rinv20,rinvsq20),krf2));
1361
1362 fscal = felec;
1363
1364 /* Calculate temporary vectorial force */
1365 tx = _mm_mul_ps(fscal,dx20);
1366 ty = _mm_mul_ps(fscal,dy20);
1367 tz = _mm_mul_ps(fscal,dz20);
1368
1369 /* Update vectorial force */
1370 fix2 = _mm_add_ps(fix2,tx);
1371 fiy2 = _mm_add_ps(fiy2,ty);
1372 fiz2 = _mm_add_ps(fiz2,tz);
1373
1374 fjx0 = _mm_add_ps(fjx0,tx);
1375 fjy0 = _mm_add_ps(fjy0,ty);
1376 fjz0 = _mm_add_ps(fjz0,tz);
1377
1378 /**************************
1379 * CALCULATE INTERACTIONS *
1380 **************************/
1381
1382 /* REACTION-FIELD ELECTROSTATICS */
1383 felec = _mm_mul_ps(qq21,_mm_sub_ps(_mm_mul_ps(rinv21,rinvsq21),krf2));
1384
1385 fscal = felec;
1386
1387 /* Calculate temporary vectorial force */
1388 tx = _mm_mul_ps(fscal,dx21);
1389 ty = _mm_mul_ps(fscal,dy21);
1390 tz = _mm_mul_ps(fscal,dz21);
1391
1392 /* Update vectorial force */
1393 fix2 = _mm_add_ps(fix2,tx);
1394 fiy2 = _mm_add_ps(fiy2,ty);
1395 fiz2 = _mm_add_ps(fiz2,tz);
1396
1397 fjx1 = _mm_add_ps(fjx1,tx);
1398 fjy1 = _mm_add_ps(fjy1,ty);
1399 fjz1 = _mm_add_ps(fjz1,tz);
1400
1401 /**************************
1402 * CALCULATE INTERACTIONS *
1403 **************************/
1404
1405 /* REACTION-FIELD ELECTROSTATICS */
1406 felec = _mm_mul_ps(qq22,_mm_sub_ps(_mm_mul_ps(rinv22,rinvsq22),krf2));
1407
1408 fscal = felec;
1409
1410 /* Calculate temporary vectorial force */
1411 tx = _mm_mul_ps(fscal,dx22);
1412 ty = _mm_mul_ps(fscal,dy22);
1413 tz = _mm_mul_ps(fscal,dz22);
1414
1415 /* Update vectorial force */
1416 fix2 = _mm_add_ps(fix2,tx);
1417 fiy2 = _mm_add_ps(fiy2,ty);
1418 fiz2 = _mm_add_ps(fiz2,tz);
1419
1420 fjx2 = _mm_add_ps(fjx2,tx);
1421 fjy2 = _mm_add_ps(fjy2,ty);
1422 fjz2 = _mm_add_ps(fjz2,tz);
1423
1424 fjptrA = f+j_coord_offsetA;
1425 fjptrB = f+j_coord_offsetB;
1426 fjptrC = f+j_coord_offsetC;
1427 fjptrD = f+j_coord_offsetD;
1428
1429 gmx_mm_decrement_3rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
1430 fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1431
1432 /* Inner loop uses 250 flops */
1433 }
1434
1435 if(jidx<j_index_end)
1436 {
1437
1438 /* Get j neighbor index, and coordinate index */
1439 jnrlistA = jjnr[jidx];
1440 jnrlistB = jjnr[jidx+1];
1441 jnrlistC = jjnr[jidx+2];
1442 jnrlistD = jjnr[jidx+3];
1443 /* Sign of each element will be negative for non-real atoms.
1444 * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
1445 * so use it as val = _mm_andnot_ps(mask,val) to clear dummy entries.
1446 */
1447 dummy_mask = gmx_mm_castsi128_ps_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
1448 jnrA = (jnrlistA>=0) ? jnrlistA : 0;
1449 jnrB = (jnrlistB>=0) ? jnrlistB : 0;
1450 jnrC = (jnrlistC>=0) ? jnrlistC : 0;
1451 jnrD = (jnrlistD>=0) ? jnrlistD : 0;
1452 j_coord_offsetA = DIM3*jnrA;
1453 j_coord_offsetB = DIM3*jnrB;
1454 j_coord_offsetC = DIM3*jnrC;
1455 j_coord_offsetD = DIM3*jnrD;
1456
1457 /* load j atom coordinates */
1458 gmx_mm_load_3rvec_4ptr_swizzle_ps(x+j_coord_offsetA,x+j_coord_offsetB,
1459 x+j_coord_offsetC,x+j_coord_offsetD,
1460 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1461
1462 /* Calculate displacement vector */
1463 dx00 = _mm_sub_ps(ix0,jx0);
1464 dy00 = _mm_sub_ps(iy0,jy0);
1465 dz00 = _mm_sub_ps(iz0,jz0);
1466 dx01 = _mm_sub_ps(ix0,jx1);
1467 dy01 = _mm_sub_ps(iy0,jy1);
1468 dz01 = _mm_sub_ps(iz0,jz1);
1469 dx02 = _mm_sub_ps(ix0,jx2);
1470 dy02 = _mm_sub_ps(iy0,jy2);
1471 dz02 = _mm_sub_ps(iz0,jz2);
1472 dx10 = _mm_sub_ps(ix1,jx0);
1473 dy10 = _mm_sub_ps(iy1,jy0);
1474 dz10 = _mm_sub_ps(iz1,jz0);
1475 dx11 = _mm_sub_ps(ix1,jx1);
1476 dy11 = _mm_sub_ps(iy1,jy1);
1477 dz11 = _mm_sub_ps(iz1,jz1);
1478 dx12 = _mm_sub_ps(ix1,jx2);
1479 dy12 = _mm_sub_ps(iy1,jy2);
1480 dz12 = _mm_sub_ps(iz1,jz2);
1481 dx20 = _mm_sub_ps(ix2,jx0);
1482 dy20 = _mm_sub_ps(iy2,jy0);
1483 dz20 = _mm_sub_ps(iz2,jz0);
1484 dx21 = _mm_sub_ps(ix2,jx1);
1485 dy21 = _mm_sub_ps(iy2,jy1);
1486 dz21 = _mm_sub_ps(iz2,jz1);
1487 dx22 = _mm_sub_ps(ix2,jx2);
1488 dy22 = _mm_sub_ps(iy2,jy2);
1489 dz22 = _mm_sub_ps(iz2,jz2);
1490
1491 /* Calculate squared distance and things based on it */
1492 rsq00 = gmx_mm_calc_rsq_ps(dx00,dy00,dz00);
1493 rsq01 = gmx_mm_calc_rsq_ps(dx01,dy01,dz01);
1494 rsq02 = gmx_mm_calc_rsq_ps(dx02,dy02,dz02);
1495 rsq10 = gmx_mm_calc_rsq_ps(dx10,dy10,dz10);
1496 rsq11 = gmx_mm_calc_rsq_ps(dx11,dy11,dz11);
1497 rsq12 = gmx_mm_calc_rsq_ps(dx12,dy12,dz12);
1498 rsq20 = gmx_mm_calc_rsq_ps(dx20,dy20,dz20);
1499 rsq21 = gmx_mm_calc_rsq_ps(dx21,dy21,dz21);
1500 rsq22 = gmx_mm_calc_rsq_ps(dx22,dy22,dz22);
1501
1502 rinv00 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq00);
1503 rinv01 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq01);
1504 rinv02 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq02);
1505 rinv10 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq10);
1506 rinv11 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq11);
1507 rinv12 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq12);
1508 rinv20 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq20);
1509 rinv21 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq21);
1510 rinv22 = gmx_mm_invsqrt_psgmx_simd_invsqrt_f(rsq22);
1511
1512 rinvsq00 = _mm_mul_ps(rinv00,rinv00);
1513 rinvsq01 = _mm_mul_ps(rinv01,rinv01);
1514 rinvsq02 = _mm_mul_ps(rinv02,rinv02);
1515 rinvsq10 = _mm_mul_ps(rinv10,rinv10);
1516 rinvsq11 = _mm_mul_ps(rinv11,rinv11);
1517 rinvsq12 = _mm_mul_ps(rinv12,rinv12);
1518 rinvsq20 = _mm_mul_ps(rinv20,rinv20);
1519 rinvsq21 = _mm_mul_ps(rinv21,rinv21);
1520 rinvsq22 = _mm_mul_ps(rinv22,rinv22);
1521
1522 fjx0 = _mm_setzero_ps();
1523 fjy0 = _mm_setzero_ps();
1524 fjz0 = _mm_setzero_ps();
1525 fjx1 = _mm_setzero_ps();
1526 fjy1 = _mm_setzero_ps();
1527 fjz1 = _mm_setzero_ps();
1528 fjx2 = _mm_setzero_ps();
1529 fjy2 = _mm_setzero_ps();
1530 fjz2 = _mm_setzero_ps();
1531
1532 /**************************
1533 * CALCULATE INTERACTIONS *
1534 **************************/
1535
1536 /* REACTION-FIELD ELECTROSTATICS */
1537 felec = _mm_mul_ps(qq00,_mm_sub_ps(_mm_mul_ps(rinv00,rinvsq00),krf2));
1538
1539 /* LENNARD-JONES DISPERSION/REPULSION */
1540
1541 rinvsix = _mm_mul_ps(_mm_mul_ps(rinvsq00,rinvsq00),rinvsq00);
1542 fvdw = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(c12_00,rinvsix),c6_00),_mm_mul_ps(rinvsix,rinvsq00));
1543
1544 fscal = _mm_add_ps(felec,fvdw);
1545
1546 fscal = _mm_andnot_ps(dummy_mask,fscal);
1547
1548 /* Calculate temporary vectorial force */
1549 tx = _mm_mul_ps(fscal,dx00);
1550 ty = _mm_mul_ps(fscal,dy00);
1551 tz = _mm_mul_ps(fscal,dz00);
1552
1553 /* Update vectorial force */
1554 fix0 = _mm_add_ps(fix0,tx);
1555 fiy0 = _mm_add_ps(fiy0,ty);
1556 fiz0 = _mm_add_ps(fiz0,tz);
1557
1558 fjx0 = _mm_add_ps(fjx0,tx);
1559 fjy0 = _mm_add_ps(fjy0,ty);
1560 fjz0 = _mm_add_ps(fjz0,tz);
1561
1562 /**************************
1563 * CALCULATE INTERACTIONS *
1564 **************************/
1565
1566 /* REACTION-FIELD ELECTROSTATICS */
1567 felec = _mm_mul_ps(qq01,_mm_sub_ps(_mm_mul_ps(rinv01,rinvsq01),krf2));
1568
1569 fscal = felec;
1570
1571 fscal = _mm_andnot_ps(dummy_mask,fscal);
1572
1573 /* Calculate temporary vectorial force */
1574 tx = _mm_mul_ps(fscal,dx01);
1575 ty = _mm_mul_ps(fscal,dy01);
1576 tz = _mm_mul_ps(fscal,dz01);
1577
1578 /* Update vectorial force */
1579 fix0 = _mm_add_ps(fix0,tx);
1580 fiy0 = _mm_add_ps(fiy0,ty);
1581 fiz0 = _mm_add_ps(fiz0,tz);
1582
1583 fjx1 = _mm_add_ps(fjx1,tx);
1584 fjy1 = _mm_add_ps(fjy1,ty);
1585 fjz1 = _mm_add_ps(fjz1,tz);
1586
1587 /**************************
1588 * CALCULATE INTERACTIONS *
1589 **************************/
1590
1591 /* REACTION-FIELD ELECTROSTATICS */
1592 felec = _mm_mul_ps(qq02,_mm_sub_ps(_mm_mul_ps(rinv02,rinvsq02),krf2));
1593
1594 fscal = felec;
1595
1596 fscal = _mm_andnot_ps(dummy_mask,fscal);
1597
1598 /* Calculate temporary vectorial force */
1599 tx = _mm_mul_ps(fscal,dx02);
1600 ty = _mm_mul_ps(fscal,dy02);
1601 tz = _mm_mul_ps(fscal,dz02);
1602
1603 /* Update vectorial force */
1604 fix0 = _mm_add_ps(fix0,tx);
1605 fiy0 = _mm_add_ps(fiy0,ty);
1606 fiz0 = _mm_add_ps(fiz0,tz);
1607
1608 fjx2 = _mm_add_ps(fjx2,tx);
1609 fjy2 = _mm_add_ps(fjy2,ty);
1610 fjz2 = _mm_add_ps(fjz2,tz);
1611
1612 /**************************
1613 * CALCULATE INTERACTIONS *
1614 **************************/
1615
1616 /* REACTION-FIELD ELECTROSTATICS */
1617 felec = _mm_mul_ps(qq10,_mm_sub_ps(_mm_mul_ps(rinv10,rinvsq10),krf2));
1618
1619 fscal = felec;
1620
1621 fscal = _mm_andnot_ps(dummy_mask,fscal);
1622
1623 /* Calculate temporary vectorial force */
1624 tx = _mm_mul_ps(fscal,dx10);
1625 ty = _mm_mul_ps(fscal,dy10);
1626 tz = _mm_mul_ps(fscal,dz10);
1627
1628 /* Update vectorial force */
1629 fix1 = _mm_add_ps(fix1,tx);
1630 fiy1 = _mm_add_ps(fiy1,ty);
1631 fiz1 = _mm_add_ps(fiz1,tz);
1632
1633 fjx0 = _mm_add_ps(fjx0,tx);
1634 fjy0 = _mm_add_ps(fjy0,ty);
1635 fjz0 = _mm_add_ps(fjz0,tz);
1636
1637 /**************************
1638 * CALCULATE INTERACTIONS *
1639 **************************/
1640
1641 /* REACTION-FIELD ELECTROSTATICS */
1642 felec = _mm_mul_ps(qq11,_mm_sub_ps(_mm_mul_ps(rinv11,rinvsq11),krf2));
1643
1644 fscal = felec;
1645
1646 fscal = _mm_andnot_ps(dummy_mask,fscal);
1647
1648 /* Calculate temporary vectorial force */
1649 tx = _mm_mul_ps(fscal,dx11);
1650 ty = _mm_mul_ps(fscal,dy11);
1651 tz = _mm_mul_ps(fscal,dz11);
1652
1653 /* Update vectorial force */
1654 fix1 = _mm_add_ps(fix1,tx);
1655 fiy1 = _mm_add_ps(fiy1,ty);
1656 fiz1 = _mm_add_ps(fiz1,tz);
1657
1658 fjx1 = _mm_add_ps(fjx1,tx);
1659 fjy1 = _mm_add_ps(fjy1,ty);
1660 fjz1 = _mm_add_ps(fjz1,tz);
1661
1662 /**************************
1663 * CALCULATE INTERACTIONS *
1664 **************************/
1665
1666 /* REACTION-FIELD ELECTROSTATICS */
1667 felec = _mm_mul_ps(qq12,_mm_sub_ps(_mm_mul_ps(rinv12,rinvsq12),krf2));
1668
1669 fscal = felec;
1670
1671 fscal = _mm_andnot_ps(dummy_mask,fscal);
1672
1673 /* Calculate temporary vectorial force */
1674 tx = _mm_mul_ps(fscal,dx12);
1675 ty = _mm_mul_ps(fscal,dy12);
1676 tz = _mm_mul_ps(fscal,dz12);
1677
1678 /* Update vectorial force */
1679 fix1 = _mm_add_ps(fix1,tx);
1680 fiy1 = _mm_add_ps(fiy1,ty);
1681 fiz1 = _mm_add_ps(fiz1,tz);
1682
1683 fjx2 = _mm_add_ps(fjx2,tx);
1684 fjy2 = _mm_add_ps(fjy2,ty);
1685 fjz2 = _mm_add_ps(fjz2,tz);
1686
1687 /**************************
1688 * CALCULATE INTERACTIONS *
1689 **************************/
1690
1691 /* REACTION-FIELD ELECTROSTATICS */
1692 felec = _mm_mul_ps(qq20,_mm_sub_ps(_mm_mul_ps(rinv20,rinvsq20),krf2));
1693
1694 fscal = felec;
1695
1696 fscal = _mm_andnot_ps(dummy_mask,fscal);
1697
1698 /* Calculate temporary vectorial force */
1699 tx = _mm_mul_ps(fscal,dx20);
1700 ty = _mm_mul_ps(fscal,dy20);
1701 tz = _mm_mul_ps(fscal,dz20);
1702
1703 /* Update vectorial force */
1704 fix2 = _mm_add_ps(fix2,tx);
1705 fiy2 = _mm_add_ps(fiy2,ty);
1706 fiz2 = _mm_add_ps(fiz2,tz);
1707
1708 fjx0 = _mm_add_ps(fjx0,tx);
1709 fjy0 = _mm_add_ps(fjy0,ty);
1710 fjz0 = _mm_add_ps(fjz0,tz);
1711
1712 /**************************
1713 * CALCULATE INTERACTIONS *
1714 **************************/
1715
1716 /* REACTION-FIELD ELECTROSTATICS */
1717 felec = _mm_mul_ps(qq21,_mm_sub_ps(_mm_mul_ps(rinv21,rinvsq21),krf2));
1718
1719 fscal = felec;
1720
1721 fscal = _mm_andnot_ps(dummy_mask,fscal);
1722
1723 /* Calculate temporary vectorial force */
1724 tx = _mm_mul_ps(fscal,dx21);
1725 ty = _mm_mul_ps(fscal,dy21);
1726 tz = _mm_mul_ps(fscal,dz21);
1727
1728 /* Update vectorial force */
1729 fix2 = _mm_add_ps(fix2,tx);
1730 fiy2 = _mm_add_ps(fiy2,ty);
1731 fiz2 = _mm_add_ps(fiz2,tz);
1732
1733 fjx1 = _mm_add_ps(fjx1,tx);
1734 fjy1 = _mm_add_ps(fjy1,ty);
1735 fjz1 = _mm_add_ps(fjz1,tz);
1736
1737 /**************************
1738 * CALCULATE INTERACTIONS *
1739 **************************/
1740
1741 /* REACTION-FIELD ELECTROSTATICS */
1742 felec = _mm_mul_ps(qq22,_mm_sub_ps(_mm_mul_ps(rinv22,rinvsq22),krf2));
1743
1744 fscal = felec;
1745
1746 fscal = _mm_andnot_ps(dummy_mask,fscal);
1747
1748 /* Calculate temporary vectorial force */
1749 tx = _mm_mul_ps(fscal,dx22);
1750 ty = _mm_mul_ps(fscal,dy22);
1751 tz = _mm_mul_ps(fscal,dz22);
1752
1753 /* Update vectorial force */
1754 fix2 = _mm_add_ps(fix2,tx);
1755 fiy2 = _mm_add_ps(fiy2,ty);
1756 fiz2 = _mm_add_ps(fiz2,tz);
1757
1758 fjx2 = _mm_add_ps(fjx2,tx);
1759 fjy2 = _mm_add_ps(fjy2,ty);
1760 fjz2 = _mm_add_ps(fjz2,tz);
1761
1762 fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
1763 fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
1764 fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
1765 fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
1766
1767 gmx_mm_decrement_3rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
1768 fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1769
1770 /* Inner loop uses 250 flops */
1771 }
1772
1773 /* End of innermost loop */
1774
1775 gmx_mm_update_iforce_3atom_swizzle_ps(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1776 f+i_coord_offset,fshift+i_shift_offset);
1777
1778 /* Increment number of inner iterations */
1779 inneriter += j_index_end - j_index_start;
1780
1781 /* Outer loop uses 18 flops */
1782 }
1783
1784 /* Increment number of outer iterations */
1785 outeriter += nri;
1786
1787 /* Update outer/inner flops */
1788
1789 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*250)(nrnb)->n[eNR_NBKERNEL_ELEC_VDW_W3W3_F] += outeriter*18 + inneriter
*250
;
1790}