2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
44 #include "../nb_kernel.h"
45 #include "types/simple.h"
46 #include "gromacs/legacyheaders/vec.h"
49 #include "kernelutil_sparc64_hpc_ace_double.h"
52 * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
53 * Electrostatics interaction: ReactionField
54 * VdW interaction: None
55 * Geometry: Water3-Water3
56 * Calculate force/pot: PotentialAndForce
59 nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
60 (t_nblist * gmx_restrict nlist,
61 rvec * gmx_restrict xx,
62 rvec * gmx_restrict ff,
63 t_forcerec * gmx_restrict fr,
64 t_mdatoms * gmx_restrict mdatoms,
65 nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
66 t_nrnb * gmx_restrict nrnb)
68 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
69 * just 0 for non-waters.
70 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
71 * jnr indices corresponding to data put in the four positions in the SIMD register.
73 int i_shift_offset,i_coord_offset,outeriter,inneriter;
74 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
76 int j_coord_offsetA,j_coord_offsetB;
77 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
79 real *shiftvec,*fshift,*x,*f;
80 _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
82 _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
84 _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
86 _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
87 int vdwjidx0A,vdwjidx0B;
88 _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
89 int vdwjidx1A,vdwjidx1B;
90 _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
91 int vdwjidx2A,vdwjidx2B;
92 _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
93 _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
94 _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
95 _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
96 _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
97 _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
98 _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
99 _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
100 _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
101 _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
102 _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
105 _fjsp_v2r8 dummy_mask,cutoff_mask;
106 _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
107 _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
108 union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
115 jindex = nlist->jindex;
117 shiftidx = nlist->shift;
119 shiftvec = fr->shift_vec[0];
120 fshift = fr->fshift[0];
121 facel = gmx_fjsp_set1_v2r8(fr->epsfac);
122 charge = mdatoms->chargeA;
123 krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
124 krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
125 crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
127 /* Setup water-specific parameters */
128 inr = nlist->iinr[0];
129 iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
130 iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
131 iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
133 jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
134 jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
135 jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
136 qq00 = _fjsp_mul_v2r8(iq0,jq0);
137 qq01 = _fjsp_mul_v2r8(iq0,jq1);
138 qq02 = _fjsp_mul_v2r8(iq0,jq2);
139 qq10 = _fjsp_mul_v2r8(iq1,jq0);
140 qq11 = _fjsp_mul_v2r8(iq1,jq1);
141 qq12 = _fjsp_mul_v2r8(iq1,jq2);
142 qq20 = _fjsp_mul_v2r8(iq2,jq0);
143 qq21 = _fjsp_mul_v2r8(iq2,jq1);
144 qq22 = _fjsp_mul_v2r8(iq2,jq2);
146 /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
147 rcutoff_scalar = fr->rcoulomb;
148 rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
149 rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
151 /* Avoid stupid compiler warnings */
159 /* Start outer loop over neighborlists */
160 for(iidx=0; iidx<nri; iidx++)
162 /* Load shift vector for this list */
163 i_shift_offset = DIM*shiftidx[iidx];
165 /* Load limits for loop over neighbors */
166 j_index_start = jindex[iidx];
167 j_index_end = jindex[iidx+1];
169 /* Get outer coordinate index */
171 i_coord_offset = DIM*inr;
173 /* Load i particle coords and add shift vector */
174 gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
175 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
177 fix0 = _fjsp_setzero_v2r8();
178 fiy0 = _fjsp_setzero_v2r8();
179 fiz0 = _fjsp_setzero_v2r8();
180 fix1 = _fjsp_setzero_v2r8();
181 fiy1 = _fjsp_setzero_v2r8();
182 fiz1 = _fjsp_setzero_v2r8();
183 fix2 = _fjsp_setzero_v2r8();
184 fiy2 = _fjsp_setzero_v2r8();
185 fiz2 = _fjsp_setzero_v2r8();
187 /* Reset potential sums */
188 velecsum = _fjsp_setzero_v2r8();
190 /* Start inner kernel loop */
191 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
194 /* Get j neighbor index, and coordinate index */
197 j_coord_offsetA = DIM*jnrA;
198 j_coord_offsetB = DIM*jnrB;
200 /* load j atom coordinates */
201 gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
202 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
204 /* Calculate displacement vector */
205 dx00 = _fjsp_sub_v2r8(ix0,jx0);
206 dy00 = _fjsp_sub_v2r8(iy0,jy0);
207 dz00 = _fjsp_sub_v2r8(iz0,jz0);
208 dx01 = _fjsp_sub_v2r8(ix0,jx1);
209 dy01 = _fjsp_sub_v2r8(iy0,jy1);
210 dz01 = _fjsp_sub_v2r8(iz0,jz1);
211 dx02 = _fjsp_sub_v2r8(ix0,jx2);
212 dy02 = _fjsp_sub_v2r8(iy0,jy2);
213 dz02 = _fjsp_sub_v2r8(iz0,jz2);
214 dx10 = _fjsp_sub_v2r8(ix1,jx0);
215 dy10 = _fjsp_sub_v2r8(iy1,jy0);
216 dz10 = _fjsp_sub_v2r8(iz1,jz0);
217 dx11 = _fjsp_sub_v2r8(ix1,jx1);
218 dy11 = _fjsp_sub_v2r8(iy1,jy1);
219 dz11 = _fjsp_sub_v2r8(iz1,jz1);
220 dx12 = _fjsp_sub_v2r8(ix1,jx2);
221 dy12 = _fjsp_sub_v2r8(iy1,jy2);
222 dz12 = _fjsp_sub_v2r8(iz1,jz2);
223 dx20 = _fjsp_sub_v2r8(ix2,jx0);
224 dy20 = _fjsp_sub_v2r8(iy2,jy0);
225 dz20 = _fjsp_sub_v2r8(iz2,jz0);
226 dx21 = _fjsp_sub_v2r8(ix2,jx1);
227 dy21 = _fjsp_sub_v2r8(iy2,jy1);
228 dz21 = _fjsp_sub_v2r8(iz2,jz1);
229 dx22 = _fjsp_sub_v2r8(ix2,jx2);
230 dy22 = _fjsp_sub_v2r8(iy2,jy2);
231 dz22 = _fjsp_sub_v2r8(iz2,jz2);
233 /* Calculate squared distance and things based on it */
234 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
235 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
236 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
237 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
238 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
239 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
240 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
241 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
242 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
244 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
245 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
246 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
247 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
248 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
249 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
250 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
251 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
252 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
254 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
255 rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
256 rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
257 rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
258 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
259 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
260 rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
261 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
262 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
264 fjx0 = _fjsp_setzero_v2r8();
265 fjy0 = _fjsp_setzero_v2r8();
266 fjz0 = _fjsp_setzero_v2r8();
267 fjx1 = _fjsp_setzero_v2r8();
268 fjy1 = _fjsp_setzero_v2r8();
269 fjz1 = _fjsp_setzero_v2r8();
270 fjx2 = _fjsp_setzero_v2r8();
271 fjy2 = _fjsp_setzero_v2r8();
272 fjz2 = _fjsp_setzero_v2r8();
274 /**************************
275 * CALCULATE INTERACTIONS *
276 **************************/
278 if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
281 /* REACTION-FIELD ELECTROSTATICS */
282 velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
283 felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
285 cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
287 /* Update potential sum for this i atom from the interaction with this j atom. */
288 velec = _fjsp_and_v2r8(velec,cutoff_mask);
289 velecsum = _fjsp_add_v2r8(velecsum,velec);
293 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
295 /* Update vectorial force */
296 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
297 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
298 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
300 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
301 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
302 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
306 /**************************
307 * CALCULATE INTERACTIONS *
308 **************************/
310 if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
313 /* REACTION-FIELD ELECTROSTATICS */
314 velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
315 felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
317 cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
319 /* Update potential sum for this i atom from the interaction with this j atom. */
320 velec = _fjsp_and_v2r8(velec,cutoff_mask);
321 velecsum = _fjsp_add_v2r8(velecsum,velec);
325 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
327 /* Update vectorial force */
328 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
329 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
330 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
332 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
333 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
334 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
338 /**************************
339 * CALCULATE INTERACTIONS *
340 **************************/
342 if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
345 /* REACTION-FIELD ELECTROSTATICS */
346 velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
347 felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
349 cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
351 /* Update potential sum for this i atom from the interaction with this j atom. */
352 velec = _fjsp_and_v2r8(velec,cutoff_mask);
353 velecsum = _fjsp_add_v2r8(velecsum,velec);
357 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
359 /* Update vectorial force */
360 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
361 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
362 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
364 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
365 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
366 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
370 /**************************
371 * CALCULATE INTERACTIONS *
372 **************************/
374 if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
377 /* REACTION-FIELD ELECTROSTATICS */
378 velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
379 felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
381 cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
383 /* Update potential sum for this i atom from the interaction with this j atom. */
384 velec = _fjsp_and_v2r8(velec,cutoff_mask);
385 velecsum = _fjsp_add_v2r8(velecsum,velec);
389 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
391 /* Update vectorial force */
392 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
393 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
394 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
396 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
397 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
398 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
402 /**************************
403 * CALCULATE INTERACTIONS *
404 **************************/
406 if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
409 /* REACTION-FIELD ELECTROSTATICS */
410 velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
411 felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
413 cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
415 /* Update potential sum for this i atom from the interaction with this j atom. */
416 velec = _fjsp_and_v2r8(velec,cutoff_mask);
417 velecsum = _fjsp_add_v2r8(velecsum,velec);
421 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
423 /* Update vectorial force */
424 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
425 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
426 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
428 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
429 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
430 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
434 /**************************
435 * CALCULATE INTERACTIONS *
436 **************************/
438 if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
441 /* REACTION-FIELD ELECTROSTATICS */
442 velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
443 felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
445 cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
447 /* Update potential sum for this i atom from the interaction with this j atom. */
448 velec = _fjsp_and_v2r8(velec,cutoff_mask);
449 velecsum = _fjsp_add_v2r8(velecsum,velec);
453 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
455 /* Update vectorial force */
456 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
457 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
458 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
460 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
461 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
462 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
466 /**************************
467 * CALCULATE INTERACTIONS *
468 **************************/
470 if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
473 /* REACTION-FIELD ELECTROSTATICS */
474 velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
475 felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
477 cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
479 /* Update potential sum for this i atom from the interaction with this j atom. */
480 velec = _fjsp_and_v2r8(velec,cutoff_mask);
481 velecsum = _fjsp_add_v2r8(velecsum,velec);
485 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
487 /* Update vectorial force */
488 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
489 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
490 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
492 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
493 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
494 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
498 /**************************
499 * CALCULATE INTERACTIONS *
500 **************************/
502 if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
505 /* REACTION-FIELD ELECTROSTATICS */
506 velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
507 felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
509 cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
511 /* Update potential sum for this i atom from the interaction with this j atom. */
512 velec = _fjsp_and_v2r8(velec,cutoff_mask);
513 velecsum = _fjsp_add_v2r8(velecsum,velec);
517 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
519 /* Update vectorial force */
520 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
521 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
522 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
524 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
525 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
526 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
530 /**************************
531 * CALCULATE INTERACTIONS *
532 **************************/
534 if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
537 /* REACTION-FIELD ELECTROSTATICS */
538 velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
539 felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
541 cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
543 /* Update potential sum for this i atom from the interaction with this j atom. */
544 velec = _fjsp_and_v2r8(velec,cutoff_mask);
545 velecsum = _fjsp_add_v2r8(velecsum,velec);
549 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
551 /* Update vectorial force */
552 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
553 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
554 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
556 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
557 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
558 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
562 gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
564 /* Inner loop uses 351 flops */
571 j_coord_offsetA = DIM*jnrA;
573 /* load j atom coordinates */
574 gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
575 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
577 /* Calculate displacement vector */
578 dx00 = _fjsp_sub_v2r8(ix0,jx0);
579 dy00 = _fjsp_sub_v2r8(iy0,jy0);
580 dz00 = _fjsp_sub_v2r8(iz0,jz0);
581 dx01 = _fjsp_sub_v2r8(ix0,jx1);
582 dy01 = _fjsp_sub_v2r8(iy0,jy1);
583 dz01 = _fjsp_sub_v2r8(iz0,jz1);
584 dx02 = _fjsp_sub_v2r8(ix0,jx2);
585 dy02 = _fjsp_sub_v2r8(iy0,jy2);
586 dz02 = _fjsp_sub_v2r8(iz0,jz2);
587 dx10 = _fjsp_sub_v2r8(ix1,jx0);
588 dy10 = _fjsp_sub_v2r8(iy1,jy0);
589 dz10 = _fjsp_sub_v2r8(iz1,jz0);
590 dx11 = _fjsp_sub_v2r8(ix1,jx1);
591 dy11 = _fjsp_sub_v2r8(iy1,jy1);
592 dz11 = _fjsp_sub_v2r8(iz1,jz1);
593 dx12 = _fjsp_sub_v2r8(ix1,jx2);
594 dy12 = _fjsp_sub_v2r8(iy1,jy2);
595 dz12 = _fjsp_sub_v2r8(iz1,jz2);
596 dx20 = _fjsp_sub_v2r8(ix2,jx0);
597 dy20 = _fjsp_sub_v2r8(iy2,jy0);
598 dz20 = _fjsp_sub_v2r8(iz2,jz0);
599 dx21 = _fjsp_sub_v2r8(ix2,jx1);
600 dy21 = _fjsp_sub_v2r8(iy2,jy1);
601 dz21 = _fjsp_sub_v2r8(iz2,jz1);
602 dx22 = _fjsp_sub_v2r8(ix2,jx2);
603 dy22 = _fjsp_sub_v2r8(iy2,jy2);
604 dz22 = _fjsp_sub_v2r8(iz2,jz2);
606 /* Calculate squared distance and things based on it */
607 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
608 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
609 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
610 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
611 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
612 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
613 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
614 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
615 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
617 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
618 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
619 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
620 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
621 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
622 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
623 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
624 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
625 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
627 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
628 rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
629 rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
630 rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
631 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
632 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
633 rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
634 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
635 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
637 fjx0 = _fjsp_setzero_v2r8();
638 fjy0 = _fjsp_setzero_v2r8();
639 fjz0 = _fjsp_setzero_v2r8();
640 fjx1 = _fjsp_setzero_v2r8();
641 fjy1 = _fjsp_setzero_v2r8();
642 fjz1 = _fjsp_setzero_v2r8();
643 fjx2 = _fjsp_setzero_v2r8();
644 fjy2 = _fjsp_setzero_v2r8();
645 fjz2 = _fjsp_setzero_v2r8();
647 /**************************
648 * CALCULATE INTERACTIONS *
649 **************************/
651 if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
654 /* REACTION-FIELD ELECTROSTATICS */
655 velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
656 felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
658 cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
660 /* Update potential sum for this i atom from the interaction with this j atom. */
661 velec = _fjsp_and_v2r8(velec,cutoff_mask);
662 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
663 velecsum = _fjsp_add_v2r8(velecsum,velec);
667 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
669 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
671 /* Update vectorial force */
672 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
673 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
674 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
676 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
677 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
678 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
682 /**************************
683 * CALCULATE INTERACTIONS *
684 **************************/
686 if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
689 /* REACTION-FIELD ELECTROSTATICS */
690 velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
691 felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
693 cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
695 /* Update potential sum for this i atom from the interaction with this j atom. */
696 velec = _fjsp_and_v2r8(velec,cutoff_mask);
697 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
698 velecsum = _fjsp_add_v2r8(velecsum,velec);
702 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
704 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
706 /* Update vectorial force */
707 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
708 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
709 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
711 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
712 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
713 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
717 /**************************
718 * CALCULATE INTERACTIONS *
719 **************************/
721 if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
724 /* REACTION-FIELD ELECTROSTATICS */
725 velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
726 felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
728 cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
730 /* Update potential sum for this i atom from the interaction with this j atom. */
731 velec = _fjsp_and_v2r8(velec,cutoff_mask);
732 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
733 velecsum = _fjsp_add_v2r8(velecsum,velec);
737 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
739 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
741 /* Update vectorial force */
742 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
743 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
744 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
746 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
747 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
748 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
752 /**************************
753 * CALCULATE INTERACTIONS *
754 **************************/
756 if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
759 /* REACTION-FIELD ELECTROSTATICS */
760 velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
761 felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
763 cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
765 /* Update potential sum for this i atom from the interaction with this j atom. */
766 velec = _fjsp_and_v2r8(velec,cutoff_mask);
767 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
768 velecsum = _fjsp_add_v2r8(velecsum,velec);
772 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
774 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
776 /* Update vectorial force */
777 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
778 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
779 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
781 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
782 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
783 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
787 /**************************
788 * CALCULATE INTERACTIONS *
789 **************************/
791 if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
794 /* REACTION-FIELD ELECTROSTATICS */
795 velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
796 felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
798 cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
800 /* Update potential sum for this i atom from the interaction with this j atom. */
801 velec = _fjsp_and_v2r8(velec,cutoff_mask);
802 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
803 velecsum = _fjsp_add_v2r8(velecsum,velec);
807 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
809 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
811 /* Update vectorial force */
812 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
813 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
814 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
816 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
817 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
818 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
822 /**************************
823 * CALCULATE INTERACTIONS *
824 **************************/
826 if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
829 /* REACTION-FIELD ELECTROSTATICS */
830 velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
831 felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
833 cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
835 /* Update potential sum for this i atom from the interaction with this j atom. */
836 velec = _fjsp_and_v2r8(velec,cutoff_mask);
837 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
838 velecsum = _fjsp_add_v2r8(velecsum,velec);
842 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
844 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
846 /* Update vectorial force */
847 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
848 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
849 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
851 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
852 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
853 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
857 /**************************
858 * CALCULATE INTERACTIONS *
859 **************************/
861 if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
864 /* REACTION-FIELD ELECTROSTATICS */
865 velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
866 felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
868 cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
870 /* Update potential sum for this i atom from the interaction with this j atom. */
871 velec = _fjsp_and_v2r8(velec,cutoff_mask);
872 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
873 velecsum = _fjsp_add_v2r8(velecsum,velec);
877 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
879 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
881 /* Update vectorial force */
882 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
883 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
884 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
886 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
887 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
888 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
892 /**************************
893 * CALCULATE INTERACTIONS *
894 **************************/
896 if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
899 /* REACTION-FIELD ELECTROSTATICS */
900 velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
901 felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
903 cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
905 /* Update potential sum for this i atom from the interaction with this j atom. */
906 velec = _fjsp_and_v2r8(velec,cutoff_mask);
907 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
908 velecsum = _fjsp_add_v2r8(velecsum,velec);
912 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
914 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
916 /* Update vectorial force */
917 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
918 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
919 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
921 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
922 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
923 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
927 /**************************
928 * CALCULATE INTERACTIONS *
929 **************************/
931 if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
934 /* REACTION-FIELD ELECTROSTATICS */
935 velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
936 felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
938 cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
940 /* Update potential sum for this i atom from the interaction with this j atom. */
941 velec = _fjsp_and_v2r8(velec,cutoff_mask);
942 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
943 velecsum = _fjsp_add_v2r8(velecsum,velec);
947 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
949 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
951 /* Update vectorial force */
952 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
953 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
954 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
956 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
957 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
958 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
962 gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
964 /* Inner loop uses 351 flops */
967 /* End of innermost loop */
969 gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
970 f+i_coord_offset,fshift+i_shift_offset);
973 /* Update potential energies */
974 gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
976 /* Increment number of inner iterations */
977 inneriter += j_index_end - j_index_start;
979 /* Outer loop uses 19 flops */
982 /* Increment number of outer iterations */
985 /* Update outer/inner flops */
987 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*351);
990 * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
991 * Electrostatics interaction: ReactionField
992 * VdW interaction: None
993 * Geometry: Water3-Water3
994 * Calculate force/pot: Force
997 nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
998 (t_nblist * gmx_restrict nlist,
999 rvec * gmx_restrict xx,
1000 rvec * gmx_restrict ff,
1001 t_forcerec * gmx_restrict fr,
1002 t_mdatoms * gmx_restrict mdatoms,
1003 nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
1004 t_nrnb * gmx_restrict nrnb)
1006 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1007 * just 0 for non-waters.
1008 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
1009 * jnr indices corresponding to data put in the four positions in the SIMD register.
1011 int i_shift_offset,i_coord_offset,outeriter,inneriter;
1012 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1014 int j_coord_offsetA,j_coord_offsetB;
1015 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
1016 real rcutoff_scalar;
1017 real *shiftvec,*fshift,*x,*f;
1018 _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1020 _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1022 _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1024 _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1025 int vdwjidx0A,vdwjidx0B;
1026 _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1027 int vdwjidx1A,vdwjidx1B;
1028 _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1029 int vdwjidx2A,vdwjidx2B;
1030 _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1031 _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1032 _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
1033 _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
1034 _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
1035 _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1036 _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1037 _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
1038 _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1039 _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1040 _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
1042 _fjsp_v2r8 itab_tmp;
1043 _fjsp_v2r8 dummy_mask,cutoff_mask;
1044 _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
1045 _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
1046 union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1053 jindex = nlist->jindex;
1055 shiftidx = nlist->shift;
1057 shiftvec = fr->shift_vec[0];
1058 fshift = fr->fshift[0];
1059 facel = gmx_fjsp_set1_v2r8(fr->epsfac);
1060 charge = mdatoms->chargeA;
1061 krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
1062 krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
1063 crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
1065 /* Setup water-specific parameters */
1066 inr = nlist->iinr[0];
1067 iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
1068 iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1069 iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1071 jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
1072 jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
1073 jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
1074 qq00 = _fjsp_mul_v2r8(iq0,jq0);
1075 qq01 = _fjsp_mul_v2r8(iq0,jq1);
1076 qq02 = _fjsp_mul_v2r8(iq0,jq2);
1077 qq10 = _fjsp_mul_v2r8(iq1,jq0);
1078 qq11 = _fjsp_mul_v2r8(iq1,jq1);
1079 qq12 = _fjsp_mul_v2r8(iq1,jq2);
1080 qq20 = _fjsp_mul_v2r8(iq2,jq0);
1081 qq21 = _fjsp_mul_v2r8(iq2,jq1);
1082 qq22 = _fjsp_mul_v2r8(iq2,jq2);
1084 /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
1085 rcutoff_scalar = fr->rcoulomb;
1086 rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
1087 rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
1089 /* Avoid stupid compiler warnings */
1091 j_coord_offsetA = 0;
1092 j_coord_offsetB = 0;
1097 /* Start outer loop over neighborlists */
1098 for(iidx=0; iidx<nri; iidx++)
1100 /* Load shift vector for this list */
1101 i_shift_offset = DIM*shiftidx[iidx];
1103 /* Load limits for loop over neighbors */
1104 j_index_start = jindex[iidx];
1105 j_index_end = jindex[iidx+1];
1107 /* Get outer coordinate index */
1109 i_coord_offset = DIM*inr;
1111 /* Load i particle coords and add shift vector */
1112 gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
1113 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
1115 fix0 = _fjsp_setzero_v2r8();
1116 fiy0 = _fjsp_setzero_v2r8();
1117 fiz0 = _fjsp_setzero_v2r8();
1118 fix1 = _fjsp_setzero_v2r8();
1119 fiy1 = _fjsp_setzero_v2r8();
1120 fiz1 = _fjsp_setzero_v2r8();
1121 fix2 = _fjsp_setzero_v2r8();
1122 fiy2 = _fjsp_setzero_v2r8();
1123 fiz2 = _fjsp_setzero_v2r8();
1125 /* Start inner kernel loop */
1126 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1129 /* Get j neighbor index, and coordinate index */
1131 jnrB = jjnr[jidx+1];
1132 j_coord_offsetA = DIM*jnrA;
1133 j_coord_offsetB = DIM*jnrB;
1135 /* load j atom coordinates */
1136 gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
1137 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1139 /* Calculate displacement vector */
1140 dx00 = _fjsp_sub_v2r8(ix0,jx0);
1141 dy00 = _fjsp_sub_v2r8(iy0,jy0);
1142 dz00 = _fjsp_sub_v2r8(iz0,jz0);
1143 dx01 = _fjsp_sub_v2r8(ix0,jx1);
1144 dy01 = _fjsp_sub_v2r8(iy0,jy1);
1145 dz01 = _fjsp_sub_v2r8(iz0,jz1);
1146 dx02 = _fjsp_sub_v2r8(ix0,jx2);
1147 dy02 = _fjsp_sub_v2r8(iy0,jy2);
1148 dz02 = _fjsp_sub_v2r8(iz0,jz2);
1149 dx10 = _fjsp_sub_v2r8(ix1,jx0);
1150 dy10 = _fjsp_sub_v2r8(iy1,jy0);
1151 dz10 = _fjsp_sub_v2r8(iz1,jz0);
1152 dx11 = _fjsp_sub_v2r8(ix1,jx1);
1153 dy11 = _fjsp_sub_v2r8(iy1,jy1);
1154 dz11 = _fjsp_sub_v2r8(iz1,jz1);
1155 dx12 = _fjsp_sub_v2r8(ix1,jx2);
1156 dy12 = _fjsp_sub_v2r8(iy1,jy2);
1157 dz12 = _fjsp_sub_v2r8(iz1,jz2);
1158 dx20 = _fjsp_sub_v2r8(ix2,jx0);
1159 dy20 = _fjsp_sub_v2r8(iy2,jy0);
1160 dz20 = _fjsp_sub_v2r8(iz2,jz0);
1161 dx21 = _fjsp_sub_v2r8(ix2,jx1);
1162 dy21 = _fjsp_sub_v2r8(iy2,jy1);
1163 dz21 = _fjsp_sub_v2r8(iz2,jz1);
1164 dx22 = _fjsp_sub_v2r8(ix2,jx2);
1165 dy22 = _fjsp_sub_v2r8(iy2,jy2);
1166 dz22 = _fjsp_sub_v2r8(iz2,jz2);
1168 /* Calculate squared distance and things based on it */
1169 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1170 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1171 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1172 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1173 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1174 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1175 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1176 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1177 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1179 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
1180 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
1181 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
1182 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
1183 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
1184 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
1185 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
1186 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
1187 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
1189 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
1190 rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
1191 rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
1192 rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
1193 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
1194 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
1195 rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
1196 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
1197 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
1199 fjx0 = _fjsp_setzero_v2r8();
1200 fjy0 = _fjsp_setzero_v2r8();
1201 fjz0 = _fjsp_setzero_v2r8();
1202 fjx1 = _fjsp_setzero_v2r8();
1203 fjy1 = _fjsp_setzero_v2r8();
1204 fjz1 = _fjsp_setzero_v2r8();
1205 fjx2 = _fjsp_setzero_v2r8();
1206 fjy2 = _fjsp_setzero_v2r8();
1207 fjz2 = _fjsp_setzero_v2r8();
1209 /**************************
1210 * CALCULATE INTERACTIONS *
1211 **************************/
1213 if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
1216 /* REACTION-FIELD ELECTROSTATICS */
1217 felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
1219 cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
1223 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1225 /* Update vectorial force */
1226 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
1227 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1228 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1230 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1231 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1232 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1236 /**************************
1237 * CALCULATE INTERACTIONS *
1238 **************************/
1240 if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
1243 /* REACTION-FIELD ELECTROSTATICS */
1244 felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
1246 cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
1250 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1252 /* Update vectorial force */
1253 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
1254 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1255 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1257 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1258 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1259 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1263 /**************************
1264 * CALCULATE INTERACTIONS *
1265 **************************/
1267 if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
1270 /* REACTION-FIELD ELECTROSTATICS */
1271 felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
1273 cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
1277 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1279 /* Update vectorial force */
1280 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
1281 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1282 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1284 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1285 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1286 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1290 /**************************
1291 * CALCULATE INTERACTIONS *
1292 **************************/
1294 if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
1297 /* REACTION-FIELD ELECTROSTATICS */
1298 felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
1300 cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
1304 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1306 /* Update vectorial force */
1307 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
1308 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1309 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1311 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1312 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1313 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1317 /**************************
1318 * CALCULATE INTERACTIONS *
1319 **************************/
1321 if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
1324 /* REACTION-FIELD ELECTROSTATICS */
1325 felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
1327 cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
1331 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1333 /* Update vectorial force */
1334 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
1335 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1336 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1338 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1339 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1340 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1344 /**************************
1345 * CALCULATE INTERACTIONS *
1346 **************************/
1348 if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
1351 /* REACTION-FIELD ELECTROSTATICS */
1352 felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
1354 cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
1358 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1360 /* Update vectorial force */
1361 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
1362 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1363 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1365 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1366 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1367 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1371 /**************************
1372 * CALCULATE INTERACTIONS *
1373 **************************/
1375 if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
1378 /* REACTION-FIELD ELECTROSTATICS */
1379 felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
1381 cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
1385 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1387 /* Update vectorial force */
1388 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
1389 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1390 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1392 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1393 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1394 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1398 /**************************
1399 * CALCULATE INTERACTIONS *
1400 **************************/
1402 if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1405 /* REACTION-FIELD ELECTROSTATICS */
1406 felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1408 cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1412 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1414 /* Update vectorial force */
1415 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1416 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1417 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1419 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1420 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1421 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1425 /**************************
1426 * CALCULATE INTERACTIONS *
1427 **************************/
1429 if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1432 /* REACTION-FIELD ELECTROSTATICS */
1433 felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1435 cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1439 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1441 /* Update vectorial force */
1442 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1443 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1444 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1446 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1447 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1448 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1452 gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1454 /* Inner loop uses 297 flops */
1457 if(jidx<j_index_end)
1461 j_coord_offsetA = DIM*jnrA;
1463 /* load j atom coordinates */
1464 gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1465 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1467 /* Calculate displacement vector */
1468 dx00 = _fjsp_sub_v2r8(ix0,jx0);
1469 dy00 = _fjsp_sub_v2r8(iy0,jy0);
1470 dz00 = _fjsp_sub_v2r8(iz0,jz0);
1471 dx01 = _fjsp_sub_v2r8(ix0,jx1);
1472 dy01 = _fjsp_sub_v2r8(iy0,jy1);
1473 dz01 = _fjsp_sub_v2r8(iz0,jz1);
1474 dx02 = _fjsp_sub_v2r8(ix0,jx2);
1475 dy02 = _fjsp_sub_v2r8(iy0,jy2);
1476 dz02 = _fjsp_sub_v2r8(iz0,jz2);
1477 dx10 = _fjsp_sub_v2r8(ix1,jx0);
1478 dy10 = _fjsp_sub_v2r8(iy1,jy0);
1479 dz10 = _fjsp_sub_v2r8(iz1,jz0);
1480 dx11 = _fjsp_sub_v2r8(ix1,jx1);
1481 dy11 = _fjsp_sub_v2r8(iy1,jy1);
1482 dz11 = _fjsp_sub_v2r8(iz1,jz1);
1483 dx12 = _fjsp_sub_v2r8(ix1,jx2);
1484 dy12 = _fjsp_sub_v2r8(iy1,jy2);
1485 dz12 = _fjsp_sub_v2r8(iz1,jz2);
1486 dx20 = _fjsp_sub_v2r8(ix2,jx0);
1487 dy20 = _fjsp_sub_v2r8(iy2,jy0);
1488 dz20 = _fjsp_sub_v2r8(iz2,jz0);
1489 dx21 = _fjsp_sub_v2r8(ix2,jx1);
1490 dy21 = _fjsp_sub_v2r8(iy2,jy1);
1491 dz21 = _fjsp_sub_v2r8(iz2,jz1);
1492 dx22 = _fjsp_sub_v2r8(ix2,jx2);
1493 dy22 = _fjsp_sub_v2r8(iy2,jy2);
1494 dz22 = _fjsp_sub_v2r8(iz2,jz2);
1496 /* Calculate squared distance and things based on it */
1497 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1498 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1499 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1500 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1501 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1502 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1503 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1504 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1505 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1507 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
1508 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
1509 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
1510 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
1511 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
1512 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
1513 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
1514 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
1515 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
1517 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
1518 rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
1519 rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
1520 rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
1521 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
1522 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
1523 rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
1524 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
1525 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
1527 fjx0 = _fjsp_setzero_v2r8();
1528 fjy0 = _fjsp_setzero_v2r8();
1529 fjz0 = _fjsp_setzero_v2r8();
1530 fjx1 = _fjsp_setzero_v2r8();
1531 fjy1 = _fjsp_setzero_v2r8();
1532 fjz1 = _fjsp_setzero_v2r8();
1533 fjx2 = _fjsp_setzero_v2r8();
1534 fjy2 = _fjsp_setzero_v2r8();
1535 fjz2 = _fjsp_setzero_v2r8();
1537 /**************************
1538 * CALCULATE INTERACTIONS *
1539 **************************/
1541 if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
1544 /* REACTION-FIELD ELECTROSTATICS */
1545 felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
1547 cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
1551 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1553 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1555 /* Update vectorial force */
1556 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
1557 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1558 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1560 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1561 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1562 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1566 /**************************
1567 * CALCULATE INTERACTIONS *
1568 **************************/
1570 if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
1573 /* REACTION-FIELD ELECTROSTATICS */
1574 felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
1576 cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
1580 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1582 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1584 /* Update vectorial force */
1585 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
1586 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1587 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1589 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1590 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1591 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1595 /**************************
1596 * CALCULATE INTERACTIONS *
1597 **************************/
1599 if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
1602 /* REACTION-FIELD ELECTROSTATICS */
1603 felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
1605 cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
1609 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1611 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1613 /* Update vectorial force */
1614 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
1615 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1616 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1618 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1619 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1620 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1624 /**************************
1625 * CALCULATE INTERACTIONS *
1626 **************************/
1628 if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
1631 /* REACTION-FIELD ELECTROSTATICS */
1632 felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
1634 cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
1638 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1640 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1642 /* Update vectorial force */
1643 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
1644 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1645 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1647 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1648 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1649 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1653 /**************************
1654 * CALCULATE INTERACTIONS *
1655 **************************/
1657 if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
1660 /* REACTION-FIELD ELECTROSTATICS */
1661 felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
1663 cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
1667 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1669 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1671 /* Update vectorial force */
1672 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
1673 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1674 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1676 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1677 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1678 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1682 /**************************
1683 * CALCULATE INTERACTIONS *
1684 **************************/
1686 if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
1689 /* REACTION-FIELD ELECTROSTATICS */
1690 felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
1692 cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
1696 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1698 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1700 /* Update vectorial force */
1701 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
1702 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1703 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1705 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1706 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1707 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1711 /**************************
1712 * CALCULATE INTERACTIONS *
1713 **************************/
1715 if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
1718 /* REACTION-FIELD ELECTROSTATICS */
1719 felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
1721 cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
1725 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1727 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1729 /* Update vectorial force */
1730 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
1731 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1732 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1734 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1735 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1736 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1740 /**************************
1741 * CALCULATE INTERACTIONS *
1742 **************************/
1744 if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1747 /* REACTION-FIELD ELECTROSTATICS */
1748 felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1750 cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1754 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1756 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1758 /* Update vectorial force */
1759 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1760 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1761 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1763 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1764 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1765 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1769 /**************************
1770 * CALCULATE INTERACTIONS *
1771 **************************/
1773 if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1776 /* REACTION-FIELD ELECTROSTATICS */
1777 felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1779 cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1783 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1785 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1787 /* Update vectorial force */
1788 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1789 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1790 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1792 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1793 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1794 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1798 gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1800 /* Inner loop uses 297 flops */
1803 /* End of innermost loop */
1805 gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1806 f+i_coord_offset,fshift+i_shift_offset);
1808 /* Increment number of inner iterations */
1809 inneriter += j_index_end - j_index_start;
1811 /* Outer loop uses 18 flops */
1814 /* Increment number of outer iterations */
1817 /* Update outer/inner flops */
1819 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*18 + inneriter*297);