2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
44 #include "../nb_kernel.h"
45 #include "gromacs/legacyheaders/types/simple.h"
46 #include "gromacs/math/vec.h"
47 #include "gromacs/legacyheaders/nrnb.h"
49 #include "kernelutil_sparc64_hpc_ace_double.h"
52 * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
53 * Electrostatics interaction: ReactionField
54 * VdW interaction: None
55 * Geometry: Water4-Water4
56 * Calculate force/pot: PotentialAndForce
59 nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
60 (t_nblist * gmx_restrict nlist,
61 rvec * gmx_restrict xx,
62 rvec * gmx_restrict ff,
63 t_forcerec * gmx_restrict fr,
64 t_mdatoms * gmx_restrict mdatoms,
65 nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
66 t_nrnb * gmx_restrict nrnb)
68 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
69 * just 0 for non-waters.
70 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
71 * jnr indices corresponding to data put in the four positions in the SIMD register.
73 int i_shift_offset,i_coord_offset,outeriter,inneriter;
74 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
76 int j_coord_offsetA,j_coord_offsetB;
77 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
79 real *shiftvec,*fshift,*x,*f;
80 _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
82 _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
84 _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
86 _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
87 int vdwjidx1A,vdwjidx1B;
88 _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
89 int vdwjidx2A,vdwjidx2B;
90 _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
91 int vdwjidx3A,vdwjidx3B;
92 _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
93 _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
94 _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
95 _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
96 _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
97 _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
98 _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
99 _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
100 _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
101 _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
102 _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
105 _fjsp_v2r8 dummy_mask,cutoff_mask;
106 _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
107 _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
108 union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
115 jindex = nlist->jindex;
117 shiftidx = nlist->shift;
119 shiftvec = fr->shift_vec[0];
120 fshift = fr->fshift[0];
121 facel = gmx_fjsp_set1_v2r8(fr->epsfac);
122 charge = mdatoms->chargeA;
123 krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
124 krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
125 crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
127 /* Setup water-specific parameters */
128 inr = nlist->iinr[0];
129 iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
130 iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
131 iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
133 jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
134 jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
135 jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
136 qq11 = _fjsp_mul_v2r8(iq1,jq1);
137 qq12 = _fjsp_mul_v2r8(iq1,jq2);
138 qq13 = _fjsp_mul_v2r8(iq1,jq3);
139 qq21 = _fjsp_mul_v2r8(iq2,jq1);
140 qq22 = _fjsp_mul_v2r8(iq2,jq2);
141 qq23 = _fjsp_mul_v2r8(iq2,jq3);
142 qq31 = _fjsp_mul_v2r8(iq3,jq1);
143 qq32 = _fjsp_mul_v2r8(iq3,jq2);
144 qq33 = _fjsp_mul_v2r8(iq3,jq3);
146 /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
147 rcutoff_scalar = fr->rcoulomb;
148 rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
149 rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
151 /* Avoid stupid compiler warnings */
159 /* Start outer loop over neighborlists */
160 for(iidx=0; iidx<nri; iidx++)
162 /* Load shift vector for this list */
163 i_shift_offset = DIM*shiftidx[iidx];
165 /* Load limits for loop over neighbors */
166 j_index_start = jindex[iidx];
167 j_index_end = jindex[iidx+1];
169 /* Get outer coordinate index */
171 i_coord_offset = DIM*inr;
173 /* Load i particle coords and add shift vector */
174 gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
175 &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
177 fix1 = _fjsp_setzero_v2r8();
178 fiy1 = _fjsp_setzero_v2r8();
179 fiz1 = _fjsp_setzero_v2r8();
180 fix2 = _fjsp_setzero_v2r8();
181 fiy2 = _fjsp_setzero_v2r8();
182 fiz2 = _fjsp_setzero_v2r8();
183 fix3 = _fjsp_setzero_v2r8();
184 fiy3 = _fjsp_setzero_v2r8();
185 fiz3 = _fjsp_setzero_v2r8();
187 /* Reset potential sums */
188 velecsum = _fjsp_setzero_v2r8();
190 /* Start inner kernel loop */
191 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
194 /* Get j neighbor index, and coordinate index */
197 j_coord_offsetA = DIM*jnrA;
198 j_coord_offsetB = DIM*jnrB;
200 /* load j atom coordinates */
201 gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
202 &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
204 /* Calculate displacement vector */
205 dx11 = _fjsp_sub_v2r8(ix1,jx1);
206 dy11 = _fjsp_sub_v2r8(iy1,jy1);
207 dz11 = _fjsp_sub_v2r8(iz1,jz1);
208 dx12 = _fjsp_sub_v2r8(ix1,jx2);
209 dy12 = _fjsp_sub_v2r8(iy1,jy2);
210 dz12 = _fjsp_sub_v2r8(iz1,jz2);
211 dx13 = _fjsp_sub_v2r8(ix1,jx3);
212 dy13 = _fjsp_sub_v2r8(iy1,jy3);
213 dz13 = _fjsp_sub_v2r8(iz1,jz3);
214 dx21 = _fjsp_sub_v2r8(ix2,jx1);
215 dy21 = _fjsp_sub_v2r8(iy2,jy1);
216 dz21 = _fjsp_sub_v2r8(iz2,jz1);
217 dx22 = _fjsp_sub_v2r8(ix2,jx2);
218 dy22 = _fjsp_sub_v2r8(iy2,jy2);
219 dz22 = _fjsp_sub_v2r8(iz2,jz2);
220 dx23 = _fjsp_sub_v2r8(ix2,jx3);
221 dy23 = _fjsp_sub_v2r8(iy2,jy3);
222 dz23 = _fjsp_sub_v2r8(iz2,jz3);
223 dx31 = _fjsp_sub_v2r8(ix3,jx1);
224 dy31 = _fjsp_sub_v2r8(iy3,jy1);
225 dz31 = _fjsp_sub_v2r8(iz3,jz1);
226 dx32 = _fjsp_sub_v2r8(ix3,jx2);
227 dy32 = _fjsp_sub_v2r8(iy3,jy2);
228 dz32 = _fjsp_sub_v2r8(iz3,jz2);
229 dx33 = _fjsp_sub_v2r8(ix3,jx3);
230 dy33 = _fjsp_sub_v2r8(iy3,jy3);
231 dz33 = _fjsp_sub_v2r8(iz3,jz3);
233 /* Calculate squared distance and things based on it */
234 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
235 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
236 rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
237 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
238 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
239 rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
240 rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
241 rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
242 rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
244 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
245 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
246 rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
247 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
248 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
249 rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
250 rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
251 rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
252 rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
254 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
255 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
256 rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
257 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
258 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
259 rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
260 rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
261 rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
262 rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
264 fjx1 = _fjsp_setzero_v2r8();
265 fjy1 = _fjsp_setzero_v2r8();
266 fjz1 = _fjsp_setzero_v2r8();
267 fjx2 = _fjsp_setzero_v2r8();
268 fjy2 = _fjsp_setzero_v2r8();
269 fjz2 = _fjsp_setzero_v2r8();
270 fjx3 = _fjsp_setzero_v2r8();
271 fjy3 = _fjsp_setzero_v2r8();
272 fjz3 = _fjsp_setzero_v2r8();
274 /**************************
275 * CALCULATE INTERACTIONS *
276 **************************/
278 if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
281 /* REACTION-FIELD ELECTROSTATICS */
282 velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
283 felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
285 cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
287 /* Update potential sum for this i atom from the interaction with this j atom. */
288 velec = _fjsp_and_v2r8(velec,cutoff_mask);
289 velecsum = _fjsp_add_v2r8(velecsum,velec);
293 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
295 /* Update vectorial force */
296 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
297 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
298 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
300 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
301 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
302 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
306 /**************************
307 * CALCULATE INTERACTIONS *
308 **************************/
310 if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
313 /* REACTION-FIELD ELECTROSTATICS */
314 velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
315 felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
317 cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
319 /* Update potential sum for this i atom from the interaction with this j atom. */
320 velec = _fjsp_and_v2r8(velec,cutoff_mask);
321 velecsum = _fjsp_add_v2r8(velecsum,velec);
325 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
327 /* Update vectorial force */
328 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
329 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
330 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
332 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
333 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
334 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
338 /**************************
339 * CALCULATE INTERACTIONS *
340 **************************/
342 if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
345 /* REACTION-FIELD ELECTROSTATICS */
346 velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
347 felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
349 cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
351 /* Update potential sum for this i atom from the interaction with this j atom. */
352 velec = _fjsp_and_v2r8(velec,cutoff_mask);
353 velecsum = _fjsp_add_v2r8(velecsum,velec);
357 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
359 /* Update vectorial force */
360 fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
361 fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
362 fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
364 fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
365 fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
366 fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
370 /**************************
371 * CALCULATE INTERACTIONS *
372 **************************/
374 if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
377 /* REACTION-FIELD ELECTROSTATICS */
378 velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
379 felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
381 cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
383 /* Update potential sum for this i atom from the interaction with this j atom. */
384 velec = _fjsp_and_v2r8(velec,cutoff_mask);
385 velecsum = _fjsp_add_v2r8(velecsum,velec);
389 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
391 /* Update vectorial force */
392 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
393 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
394 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
396 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
397 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
398 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
402 /**************************
403 * CALCULATE INTERACTIONS *
404 **************************/
406 if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
409 /* REACTION-FIELD ELECTROSTATICS */
410 velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
411 felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
413 cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
415 /* Update potential sum for this i atom from the interaction with this j atom. */
416 velec = _fjsp_and_v2r8(velec,cutoff_mask);
417 velecsum = _fjsp_add_v2r8(velecsum,velec);
421 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
423 /* Update vectorial force */
424 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
425 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
426 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
428 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
429 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
430 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
434 /**************************
435 * CALCULATE INTERACTIONS *
436 **************************/
438 if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
441 /* REACTION-FIELD ELECTROSTATICS */
442 velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
443 felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
445 cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
447 /* Update potential sum for this i atom from the interaction with this j atom. */
448 velec = _fjsp_and_v2r8(velec,cutoff_mask);
449 velecsum = _fjsp_add_v2r8(velecsum,velec);
453 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
455 /* Update vectorial force */
456 fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
457 fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
458 fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
460 fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
461 fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
462 fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
466 /**************************
467 * CALCULATE INTERACTIONS *
468 **************************/
470 if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
473 /* REACTION-FIELD ELECTROSTATICS */
474 velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
475 felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
477 cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
479 /* Update potential sum for this i atom from the interaction with this j atom. */
480 velec = _fjsp_and_v2r8(velec,cutoff_mask);
481 velecsum = _fjsp_add_v2r8(velecsum,velec);
485 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
487 /* Update vectorial force */
488 fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
489 fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
490 fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
492 fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
493 fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
494 fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
498 /**************************
499 * CALCULATE INTERACTIONS *
500 **************************/
502 if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
505 /* REACTION-FIELD ELECTROSTATICS */
506 velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
507 felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
509 cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
511 /* Update potential sum for this i atom from the interaction with this j atom. */
512 velec = _fjsp_and_v2r8(velec,cutoff_mask);
513 velecsum = _fjsp_add_v2r8(velecsum,velec);
517 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
519 /* Update vectorial force */
520 fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
521 fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
522 fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
524 fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
525 fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
526 fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
530 /**************************
531 * CALCULATE INTERACTIONS *
532 **************************/
534 if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
537 /* REACTION-FIELD ELECTROSTATICS */
538 velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
539 felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
541 cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
543 /* Update potential sum for this i atom from the interaction with this j atom. */
544 velec = _fjsp_and_v2r8(velec,cutoff_mask);
545 velecsum = _fjsp_add_v2r8(velecsum,velec);
549 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
551 /* Update vectorial force */
552 fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
553 fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
554 fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
556 fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
557 fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
558 fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
562 gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
564 /* Inner loop uses 351 flops */
571 j_coord_offsetA = DIM*jnrA;
573 /* load j atom coordinates */
574 gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
575 &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
577 /* Calculate displacement vector */
578 dx11 = _fjsp_sub_v2r8(ix1,jx1);
579 dy11 = _fjsp_sub_v2r8(iy1,jy1);
580 dz11 = _fjsp_sub_v2r8(iz1,jz1);
581 dx12 = _fjsp_sub_v2r8(ix1,jx2);
582 dy12 = _fjsp_sub_v2r8(iy1,jy2);
583 dz12 = _fjsp_sub_v2r8(iz1,jz2);
584 dx13 = _fjsp_sub_v2r8(ix1,jx3);
585 dy13 = _fjsp_sub_v2r8(iy1,jy3);
586 dz13 = _fjsp_sub_v2r8(iz1,jz3);
587 dx21 = _fjsp_sub_v2r8(ix2,jx1);
588 dy21 = _fjsp_sub_v2r8(iy2,jy1);
589 dz21 = _fjsp_sub_v2r8(iz2,jz1);
590 dx22 = _fjsp_sub_v2r8(ix2,jx2);
591 dy22 = _fjsp_sub_v2r8(iy2,jy2);
592 dz22 = _fjsp_sub_v2r8(iz2,jz2);
593 dx23 = _fjsp_sub_v2r8(ix2,jx3);
594 dy23 = _fjsp_sub_v2r8(iy2,jy3);
595 dz23 = _fjsp_sub_v2r8(iz2,jz3);
596 dx31 = _fjsp_sub_v2r8(ix3,jx1);
597 dy31 = _fjsp_sub_v2r8(iy3,jy1);
598 dz31 = _fjsp_sub_v2r8(iz3,jz1);
599 dx32 = _fjsp_sub_v2r8(ix3,jx2);
600 dy32 = _fjsp_sub_v2r8(iy3,jy2);
601 dz32 = _fjsp_sub_v2r8(iz3,jz2);
602 dx33 = _fjsp_sub_v2r8(ix3,jx3);
603 dy33 = _fjsp_sub_v2r8(iy3,jy3);
604 dz33 = _fjsp_sub_v2r8(iz3,jz3);
606 /* Calculate squared distance and things based on it */
607 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
608 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
609 rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
610 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
611 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
612 rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
613 rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
614 rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
615 rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
617 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
618 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
619 rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
620 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
621 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
622 rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
623 rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
624 rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
625 rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
627 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
628 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
629 rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
630 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
631 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
632 rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
633 rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
634 rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
635 rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
637 fjx1 = _fjsp_setzero_v2r8();
638 fjy1 = _fjsp_setzero_v2r8();
639 fjz1 = _fjsp_setzero_v2r8();
640 fjx2 = _fjsp_setzero_v2r8();
641 fjy2 = _fjsp_setzero_v2r8();
642 fjz2 = _fjsp_setzero_v2r8();
643 fjx3 = _fjsp_setzero_v2r8();
644 fjy3 = _fjsp_setzero_v2r8();
645 fjz3 = _fjsp_setzero_v2r8();
647 /**************************
648 * CALCULATE INTERACTIONS *
649 **************************/
651 if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
654 /* REACTION-FIELD ELECTROSTATICS */
655 velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
656 felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
658 cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
660 /* Update potential sum for this i atom from the interaction with this j atom. */
661 velec = _fjsp_and_v2r8(velec,cutoff_mask);
662 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
663 velecsum = _fjsp_add_v2r8(velecsum,velec);
667 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
669 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
671 /* Update vectorial force */
672 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
673 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
674 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
676 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
677 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
678 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
682 /**************************
683 * CALCULATE INTERACTIONS *
684 **************************/
686 if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
689 /* REACTION-FIELD ELECTROSTATICS */
690 velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
691 felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
693 cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
695 /* Update potential sum for this i atom from the interaction with this j atom. */
696 velec = _fjsp_and_v2r8(velec,cutoff_mask);
697 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
698 velecsum = _fjsp_add_v2r8(velecsum,velec);
702 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
704 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
706 /* Update vectorial force */
707 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
708 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
709 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
711 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
712 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
713 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
717 /**************************
718 * CALCULATE INTERACTIONS *
719 **************************/
721 if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
724 /* REACTION-FIELD ELECTROSTATICS */
725 velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
726 felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
728 cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
730 /* Update potential sum for this i atom from the interaction with this j atom. */
731 velec = _fjsp_and_v2r8(velec,cutoff_mask);
732 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
733 velecsum = _fjsp_add_v2r8(velecsum,velec);
737 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
739 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
741 /* Update vectorial force */
742 fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
743 fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
744 fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
746 fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
747 fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
748 fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
752 /**************************
753 * CALCULATE INTERACTIONS *
754 **************************/
756 if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
759 /* REACTION-FIELD ELECTROSTATICS */
760 velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
761 felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
763 cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
765 /* Update potential sum for this i atom from the interaction with this j atom. */
766 velec = _fjsp_and_v2r8(velec,cutoff_mask);
767 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
768 velecsum = _fjsp_add_v2r8(velecsum,velec);
772 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
774 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
776 /* Update vectorial force */
777 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
778 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
779 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
781 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
782 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
783 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
787 /**************************
788 * CALCULATE INTERACTIONS *
789 **************************/
791 if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
794 /* REACTION-FIELD ELECTROSTATICS */
795 velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
796 felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
798 cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
800 /* Update potential sum for this i atom from the interaction with this j atom. */
801 velec = _fjsp_and_v2r8(velec,cutoff_mask);
802 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
803 velecsum = _fjsp_add_v2r8(velecsum,velec);
807 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
809 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
811 /* Update vectorial force */
812 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
813 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
814 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
816 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
817 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
818 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
822 /**************************
823 * CALCULATE INTERACTIONS *
824 **************************/
826 if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
829 /* REACTION-FIELD ELECTROSTATICS */
830 velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
831 felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
833 cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
835 /* Update potential sum for this i atom from the interaction with this j atom. */
836 velec = _fjsp_and_v2r8(velec,cutoff_mask);
837 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
838 velecsum = _fjsp_add_v2r8(velecsum,velec);
842 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
844 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
846 /* Update vectorial force */
847 fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
848 fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
849 fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
851 fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
852 fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
853 fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
857 /**************************
858 * CALCULATE INTERACTIONS *
859 **************************/
861 if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
864 /* REACTION-FIELD ELECTROSTATICS */
865 velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
866 felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
868 cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
870 /* Update potential sum for this i atom from the interaction with this j atom. */
871 velec = _fjsp_and_v2r8(velec,cutoff_mask);
872 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
873 velecsum = _fjsp_add_v2r8(velecsum,velec);
877 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
879 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
881 /* Update vectorial force */
882 fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
883 fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
884 fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
886 fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
887 fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
888 fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
892 /**************************
893 * CALCULATE INTERACTIONS *
894 **************************/
896 if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
899 /* REACTION-FIELD ELECTROSTATICS */
900 velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
901 felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
903 cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
905 /* Update potential sum for this i atom from the interaction with this j atom. */
906 velec = _fjsp_and_v2r8(velec,cutoff_mask);
907 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
908 velecsum = _fjsp_add_v2r8(velecsum,velec);
912 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
914 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
916 /* Update vectorial force */
917 fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
918 fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
919 fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
921 fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
922 fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
923 fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
927 /**************************
928 * CALCULATE INTERACTIONS *
929 **************************/
931 if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
934 /* REACTION-FIELD ELECTROSTATICS */
935 velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
936 felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
938 cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
940 /* Update potential sum for this i atom from the interaction with this j atom. */
941 velec = _fjsp_and_v2r8(velec,cutoff_mask);
942 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
943 velecsum = _fjsp_add_v2r8(velecsum,velec);
947 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
949 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
951 /* Update vectorial force */
952 fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
953 fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
954 fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
956 fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
957 fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
958 fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
962 gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
964 /* Inner loop uses 351 flops */
967 /* End of innermost loop */
969 gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
970 f+i_coord_offset+DIM,fshift+i_shift_offset);
973 /* Update potential energies */
974 gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
976 /* Increment number of inner iterations */
977 inneriter += j_index_end - j_index_start;
979 /* Outer loop uses 19 flops */
982 /* Increment number of outer iterations */
985 /* Update outer/inner flops */
987 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*19 + inneriter*351);
990 * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
991 * Electrostatics interaction: ReactionField
992 * VdW interaction: None
993 * Geometry: Water4-Water4
994 * Calculate force/pot: Force
997 nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
998 (t_nblist * gmx_restrict nlist,
999 rvec * gmx_restrict xx,
1000 rvec * gmx_restrict ff,
1001 t_forcerec * gmx_restrict fr,
1002 t_mdatoms * gmx_restrict mdatoms,
1003 nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
1004 t_nrnb * gmx_restrict nrnb)
1006 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1007 * just 0 for non-waters.
1008 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
1009 * jnr indices corresponding to data put in the four positions in the SIMD register.
1011 int i_shift_offset,i_coord_offset,outeriter,inneriter;
1012 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1014 int j_coord_offsetA,j_coord_offsetB;
1015 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
1016 real rcutoff_scalar;
1017 real *shiftvec,*fshift,*x,*f;
1018 _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1020 _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1022 _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1024 _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
1025 int vdwjidx1A,vdwjidx1B;
1026 _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1027 int vdwjidx2A,vdwjidx2B;
1028 _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1029 int vdwjidx3A,vdwjidx3B;
1030 _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
1031 _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1032 _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1033 _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
1034 _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1035 _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1036 _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
1037 _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
1038 _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
1039 _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
1040 _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
1042 _fjsp_v2r8 itab_tmp;
1043 _fjsp_v2r8 dummy_mask,cutoff_mask;
1044 _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
1045 _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
1046 union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1053 jindex = nlist->jindex;
1055 shiftidx = nlist->shift;
1057 shiftvec = fr->shift_vec[0];
1058 fshift = fr->fshift[0];
1059 facel = gmx_fjsp_set1_v2r8(fr->epsfac);
1060 charge = mdatoms->chargeA;
1061 krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
1062 krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
1063 crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
1065 /* Setup water-specific parameters */
1066 inr = nlist->iinr[0];
1067 iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1068 iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1069 iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
1071 jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
1072 jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
1073 jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
1074 qq11 = _fjsp_mul_v2r8(iq1,jq1);
1075 qq12 = _fjsp_mul_v2r8(iq1,jq2);
1076 qq13 = _fjsp_mul_v2r8(iq1,jq3);
1077 qq21 = _fjsp_mul_v2r8(iq2,jq1);
1078 qq22 = _fjsp_mul_v2r8(iq2,jq2);
1079 qq23 = _fjsp_mul_v2r8(iq2,jq3);
1080 qq31 = _fjsp_mul_v2r8(iq3,jq1);
1081 qq32 = _fjsp_mul_v2r8(iq3,jq2);
1082 qq33 = _fjsp_mul_v2r8(iq3,jq3);
1084 /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
1085 rcutoff_scalar = fr->rcoulomb;
1086 rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
1087 rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
1089 /* Avoid stupid compiler warnings */
1091 j_coord_offsetA = 0;
1092 j_coord_offsetB = 0;
1097 /* Start outer loop over neighborlists */
1098 for(iidx=0; iidx<nri; iidx++)
1100 /* Load shift vector for this list */
1101 i_shift_offset = DIM*shiftidx[iidx];
1103 /* Load limits for loop over neighbors */
1104 j_index_start = jindex[iidx];
1105 j_index_end = jindex[iidx+1];
1107 /* Get outer coordinate index */
1109 i_coord_offset = DIM*inr;
1111 /* Load i particle coords and add shift vector */
1112 gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
1113 &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
1115 fix1 = _fjsp_setzero_v2r8();
1116 fiy1 = _fjsp_setzero_v2r8();
1117 fiz1 = _fjsp_setzero_v2r8();
1118 fix2 = _fjsp_setzero_v2r8();
1119 fiy2 = _fjsp_setzero_v2r8();
1120 fiz2 = _fjsp_setzero_v2r8();
1121 fix3 = _fjsp_setzero_v2r8();
1122 fiy3 = _fjsp_setzero_v2r8();
1123 fiz3 = _fjsp_setzero_v2r8();
1125 /* Start inner kernel loop */
1126 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1129 /* Get j neighbor index, and coordinate index */
1131 jnrB = jjnr[jidx+1];
1132 j_coord_offsetA = DIM*jnrA;
1133 j_coord_offsetB = DIM*jnrB;
1135 /* load j atom coordinates */
1136 gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
1137 &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
1139 /* Calculate displacement vector */
1140 dx11 = _fjsp_sub_v2r8(ix1,jx1);
1141 dy11 = _fjsp_sub_v2r8(iy1,jy1);
1142 dz11 = _fjsp_sub_v2r8(iz1,jz1);
1143 dx12 = _fjsp_sub_v2r8(ix1,jx2);
1144 dy12 = _fjsp_sub_v2r8(iy1,jy2);
1145 dz12 = _fjsp_sub_v2r8(iz1,jz2);
1146 dx13 = _fjsp_sub_v2r8(ix1,jx3);
1147 dy13 = _fjsp_sub_v2r8(iy1,jy3);
1148 dz13 = _fjsp_sub_v2r8(iz1,jz3);
1149 dx21 = _fjsp_sub_v2r8(ix2,jx1);
1150 dy21 = _fjsp_sub_v2r8(iy2,jy1);
1151 dz21 = _fjsp_sub_v2r8(iz2,jz1);
1152 dx22 = _fjsp_sub_v2r8(ix2,jx2);
1153 dy22 = _fjsp_sub_v2r8(iy2,jy2);
1154 dz22 = _fjsp_sub_v2r8(iz2,jz2);
1155 dx23 = _fjsp_sub_v2r8(ix2,jx3);
1156 dy23 = _fjsp_sub_v2r8(iy2,jy3);
1157 dz23 = _fjsp_sub_v2r8(iz2,jz3);
1158 dx31 = _fjsp_sub_v2r8(ix3,jx1);
1159 dy31 = _fjsp_sub_v2r8(iy3,jy1);
1160 dz31 = _fjsp_sub_v2r8(iz3,jz1);
1161 dx32 = _fjsp_sub_v2r8(ix3,jx2);
1162 dy32 = _fjsp_sub_v2r8(iy3,jy2);
1163 dz32 = _fjsp_sub_v2r8(iz3,jz2);
1164 dx33 = _fjsp_sub_v2r8(ix3,jx3);
1165 dy33 = _fjsp_sub_v2r8(iy3,jy3);
1166 dz33 = _fjsp_sub_v2r8(iz3,jz3);
1168 /* Calculate squared distance and things based on it */
1169 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1170 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1171 rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
1172 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1173 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1174 rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
1175 rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
1176 rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
1177 rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
1179 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
1180 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
1181 rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
1182 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
1183 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
1184 rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
1185 rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
1186 rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
1187 rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
1189 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
1190 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
1191 rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
1192 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
1193 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
1194 rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
1195 rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
1196 rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
1197 rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
1199 fjx1 = _fjsp_setzero_v2r8();
1200 fjy1 = _fjsp_setzero_v2r8();
1201 fjz1 = _fjsp_setzero_v2r8();
1202 fjx2 = _fjsp_setzero_v2r8();
1203 fjy2 = _fjsp_setzero_v2r8();
1204 fjz2 = _fjsp_setzero_v2r8();
1205 fjx3 = _fjsp_setzero_v2r8();
1206 fjy3 = _fjsp_setzero_v2r8();
1207 fjz3 = _fjsp_setzero_v2r8();
1209 /**************************
1210 * CALCULATE INTERACTIONS *
1211 **************************/
1213 if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
1216 /* REACTION-FIELD ELECTROSTATICS */
1217 felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
1219 cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
1223 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1225 /* Update vectorial force */
1226 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
1227 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1228 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1230 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1231 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1232 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1236 /**************************
1237 * CALCULATE INTERACTIONS *
1238 **************************/
1240 if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
1243 /* REACTION-FIELD ELECTROSTATICS */
1244 felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
1246 cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
1250 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1252 /* Update vectorial force */
1253 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
1254 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1255 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1257 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1258 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1259 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1263 /**************************
1264 * CALCULATE INTERACTIONS *
1265 **************************/
1267 if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
1270 /* REACTION-FIELD ELECTROSTATICS */
1271 felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
1273 cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
1277 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1279 /* Update vectorial force */
1280 fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
1281 fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
1282 fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
1284 fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
1285 fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
1286 fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
1290 /**************************
1291 * CALCULATE INTERACTIONS *
1292 **************************/
1294 if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1297 /* REACTION-FIELD ELECTROSTATICS */
1298 felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1300 cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1304 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1306 /* Update vectorial force */
1307 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1308 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1309 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1311 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1312 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1313 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1317 /**************************
1318 * CALCULATE INTERACTIONS *
1319 **************************/
1321 if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1324 /* REACTION-FIELD ELECTROSTATICS */
1325 felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1327 cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1331 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1333 /* Update vectorial force */
1334 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1335 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1336 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1338 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1339 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1340 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1344 /**************************
1345 * CALCULATE INTERACTIONS *
1346 **************************/
1348 if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
1351 /* REACTION-FIELD ELECTROSTATICS */
1352 felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
1354 cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
1358 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1360 /* Update vectorial force */
1361 fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
1362 fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
1363 fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
1365 fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
1366 fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
1367 fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
1371 /**************************
1372 * CALCULATE INTERACTIONS *
1373 **************************/
1375 if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
1378 /* REACTION-FIELD ELECTROSTATICS */
1379 felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
1381 cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
1385 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1387 /* Update vectorial force */
1388 fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
1389 fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
1390 fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
1392 fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
1393 fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
1394 fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
1398 /**************************
1399 * CALCULATE INTERACTIONS *
1400 **************************/
1402 if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
1405 /* REACTION-FIELD ELECTROSTATICS */
1406 felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
1408 cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
1412 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1414 /* Update vectorial force */
1415 fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
1416 fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
1417 fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
1419 fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
1420 fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
1421 fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
1425 /**************************
1426 * CALCULATE INTERACTIONS *
1427 **************************/
1429 if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
1432 /* REACTION-FIELD ELECTROSTATICS */
1433 felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
1435 cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
1439 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1441 /* Update vectorial force */
1442 fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
1443 fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
1444 fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
1446 fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
1447 fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
1448 fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
1452 gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1454 /* Inner loop uses 297 flops */
1457 if(jidx<j_index_end)
1461 j_coord_offsetA = DIM*jnrA;
1463 /* load j atom coordinates */
1464 gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
1465 &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
1467 /* Calculate displacement vector */
1468 dx11 = _fjsp_sub_v2r8(ix1,jx1);
1469 dy11 = _fjsp_sub_v2r8(iy1,jy1);
1470 dz11 = _fjsp_sub_v2r8(iz1,jz1);
1471 dx12 = _fjsp_sub_v2r8(ix1,jx2);
1472 dy12 = _fjsp_sub_v2r8(iy1,jy2);
1473 dz12 = _fjsp_sub_v2r8(iz1,jz2);
1474 dx13 = _fjsp_sub_v2r8(ix1,jx3);
1475 dy13 = _fjsp_sub_v2r8(iy1,jy3);
1476 dz13 = _fjsp_sub_v2r8(iz1,jz3);
1477 dx21 = _fjsp_sub_v2r8(ix2,jx1);
1478 dy21 = _fjsp_sub_v2r8(iy2,jy1);
1479 dz21 = _fjsp_sub_v2r8(iz2,jz1);
1480 dx22 = _fjsp_sub_v2r8(ix2,jx2);
1481 dy22 = _fjsp_sub_v2r8(iy2,jy2);
1482 dz22 = _fjsp_sub_v2r8(iz2,jz2);
1483 dx23 = _fjsp_sub_v2r8(ix2,jx3);
1484 dy23 = _fjsp_sub_v2r8(iy2,jy3);
1485 dz23 = _fjsp_sub_v2r8(iz2,jz3);
1486 dx31 = _fjsp_sub_v2r8(ix3,jx1);
1487 dy31 = _fjsp_sub_v2r8(iy3,jy1);
1488 dz31 = _fjsp_sub_v2r8(iz3,jz1);
1489 dx32 = _fjsp_sub_v2r8(ix3,jx2);
1490 dy32 = _fjsp_sub_v2r8(iy3,jy2);
1491 dz32 = _fjsp_sub_v2r8(iz3,jz2);
1492 dx33 = _fjsp_sub_v2r8(ix3,jx3);
1493 dy33 = _fjsp_sub_v2r8(iy3,jy3);
1494 dz33 = _fjsp_sub_v2r8(iz3,jz3);
1496 /* Calculate squared distance and things based on it */
1497 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1498 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1499 rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
1500 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1501 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1502 rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
1503 rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
1504 rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
1505 rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
1507 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
1508 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
1509 rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
1510 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
1511 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
1512 rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
1513 rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
1514 rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
1515 rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
1517 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
1518 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
1519 rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
1520 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
1521 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
1522 rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
1523 rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
1524 rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
1525 rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
1527 fjx1 = _fjsp_setzero_v2r8();
1528 fjy1 = _fjsp_setzero_v2r8();
1529 fjz1 = _fjsp_setzero_v2r8();
1530 fjx2 = _fjsp_setzero_v2r8();
1531 fjy2 = _fjsp_setzero_v2r8();
1532 fjz2 = _fjsp_setzero_v2r8();
1533 fjx3 = _fjsp_setzero_v2r8();
1534 fjy3 = _fjsp_setzero_v2r8();
1535 fjz3 = _fjsp_setzero_v2r8();
1537 /**************************
1538 * CALCULATE INTERACTIONS *
1539 **************************/
1541 if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
1544 /* REACTION-FIELD ELECTROSTATICS */
1545 felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
1547 cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
1551 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1553 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1555 /* Update vectorial force */
1556 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
1557 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1558 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1560 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1561 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1562 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1566 /**************************
1567 * CALCULATE INTERACTIONS *
1568 **************************/
1570 if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
1573 /* REACTION-FIELD ELECTROSTATICS */
1574 felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
1576 cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
1580 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1582 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1584 /* Update vectorial force */
1585 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
1586 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1587 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1589 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1590 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1591 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1595 /**************************
1596 * CALCULATE INTERACTIONS *
1597 **************************/
1599 if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
1602 /* REACTION-FIELD ELECTROSTATICS */
1603 felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
1605 cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
1609 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1611 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1613 /* Update vectorial force */
1614 fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
1615 fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
1616 fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
1618 fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
1619 fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
1620 fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
1624 /**************************
1625 * CALCULATE INTERACTIONS *
1626 **************************/
1628 if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1631 /* REACTION-FIELD ELECTROSTATICS */
1632 felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1634 cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1638 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1640 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1642 /* Update vectorial force */
1643 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1644 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1645 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1647 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1648 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1649 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1653 /**************************
1654 * CALCULATE INTERACTIONS *
1655 **************************/
1657 if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1660 /* REACTION-FIELD ELECTROSTATICS */
1661 felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1663 cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1667 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1669 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1671 /* Update vectorial force */
1672 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1673 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1674 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1676 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1677 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1678 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1682 /**************************
1683 * CALCULATE INTERACTIONS *
1684 **************************/
1686 if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
1689 /* REACTION-FIELD ELECTROSTATICS */
1690 felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
1692 cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
1696 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1698 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1700 /* Update vectorial force */
1701 fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
1702 fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
1703 fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
1705 fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
1706 fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
1707 fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
1711 /**************************
1712 * CALCULATE INTERACTIONS *
1713 **************************/
1715 if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
1718 /* REACTION-FIELD ELECTROSTATICS */
1719 felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
1721 cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
1725 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1727 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1729 /* Update vectorial force */
1730 fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
1731 fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
1732 fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
1734 fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
1735 fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
1736 fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
1740 /**************************
1741 * CALCULATE INTERACTIONS *
1742 **************************/
1744 if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
1747 /* REACTION-FIELD ELECTROSTATICS */
1748 felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
1750 cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
1754 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1756 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1758 /* Update vectorial force */
1759 fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
1760 fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
1761 fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
1763 fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
1764 fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
1765 fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
1769 /**************************
1770 * CALCULATE INTERACTIONS *
1771 **************************/
1773 if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
1776 /* REACTION-FIELD ELECTROSTATICS */
1777 felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
1779 cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
1783 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1785 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1787 /* Update vectorial force */
1788 fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
1789 fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
1790 fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
1792 fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
1793 fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
1794 fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
1798 gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1800 /* Inner loop uses 297 flops */
1803 /* End of innermost loop */
1805 gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
1806 f+i_coord_offset+DIM,fshift+i_shift_offset);
1808 /* Increment number of inner iterations */
1809 inneriter += j_index_end - j_index_start;
1811 /* Outer loop uses 18 flops */
1814 /* Increment number of outer iterations */
1817 /* Update outer/inner flops */
1819 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_F,outeriter*18 + inneriter*297);