2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012, by the GROMACS development team, led by
5 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
6 * others, as listed in the AUTHORS file in the top-level source
7 * directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
44 #include "../nb_kernel.h"
45 #include "types/simple.h"
49 #include "kernelutil_sparc64_hpc_ace_double.h"
52 * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_sparc64_hpc_ace_double
53 * Electrostatics interaction: ReactionField
54 * VdW interaction: LennardJones
55 * Geometry: Water4-Water4
56 * Calculate force/pot: PotentialAndForce
59 nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_sparc64_hpc_ace_double
60 (t_nblist * gmx_restrict nlist,
61 rvec * gmx_restrict xx,
62 rvec * gmx_restrict ff,
63 t_forcerec * gmx_restrict fr,
64 t_mdatoms * gmx_restrict mdatoms,
65 nb_kernel_data_t * gmx_restrict kernel_data,
66 t_nrnb * gmx_restrict nrnb)
68 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
69 * just 0 for non-waters.
70 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
71 * jnr indices corresponding to data put in the four positions in the SIMD register.
73 int i_shift_offset,i_coord_offset,outeriter,inneriter;
74 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
76 int j_coord_offsetA,j_coord_offsetB;
77 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
79 real *shiftvec,*fshift,*x,*f;
80 _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
82 _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
84 _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
86 _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
88 _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
89 int vdwjidx0A,vdwjidx0B;
90 _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
91 int vdwjidx1A,vdwjidx1B;
92 _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
93 int vdwjidx2A,vdwjidx2B;
94 _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
95 int vdwjidx3A,vdwjidx3B;
96 _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
97 _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
98 _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
99 _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
100 _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
101 _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
102 _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
103 _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
104 _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
105 _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
106 _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
107 _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
110 _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
113 _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
114 _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
116 _fjsp_v2r8 dummy_mask,cutoff_mask;
117 _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
118 _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
119 union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
126 jindex = nlist->jindex;
128 shiftidx = nlist->shift;
130 shiftvec = fr->shift_vec[0];
131 fshift = fr->fshift[0];
132 facel = gmx_fjsp_set1_v2r8(fr->epsfac);
133 charge = mdatoms->chargeA;
134 krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
135 krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
136 crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
137 nvdwtype = fr->ntype;
139 vdwtype = mdatoms->typeA;
141 /* Setup water-specific parameters */
142 inr = nlist->iinr[0];
143 iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
144 iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
145 iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
146 vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
148 jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
149 jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
150 jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
151 vdwjidx0A = 2*vdwtype[inr+0];
152 c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
153 c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
154 qq11 = _fjsp_mul_v2r8(iq1,jq1);
155 qq12 = _fjsp_mul_v2r8(iq1,jq2);
156 qq13 = _fjsp_mul_v2r8(iq1,jq3);
157 qq21 = _fjsp_mul_v2r8(iq2,jq1);
158 qq22 = _fjsp_mul_v2r8(iq2,jq2);
159 qq23 = _fjsp_mul_v2r8(iq2,jq3);
160 qq31 = _fjsp_mul_v2r8(iq3,jq1);
161 qq32 = _fjsp_mul_v2r8(iq3,jq2);
162 qq33 = _fjsp_mul_v2r8(iq3,jq3);
164 /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
165 rcutoff_scalar = fr->rcoulomb;
166 rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
167 rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
169 sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
170 rvdw = gmx_fjsp_set1_v2r8(fr->rvdw);
172 /* Avoid stupid compiler warnings */
180 /* Start outer loop over neighborlists */
181 for(iidx=0; iidx<nri; iidx++)
183 /* Load shift vector for this list */
184 i_shift_offset = DIM*shiftidx[iidx];
186 /* Load limits for loop over neighbors */
187 j_index_start = jindex[iidx];
188 j_index_end = jindex[iidx+1];
190 /* Get outer coordinate index */
192 i_coord_offset = DIM*inr;
194 /* Load i particle coords and add shift vector */
195 gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
196 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
198 fix0 = _fjsp_setzero_v2r8();
199 fiy0 = _fjsp_setzero_v2r8();
200 fiz0 = _fjsp_setzero_v2r8();
201 fix1 = _fjsp_setzero_v2r8();
202 fiy1 = _fjsp_setzero_v2r8();
203 fiz1 = _fjsp_setzero_v2r8();
204 fix2 = _fjsp_setzero_v2r8();
205 fiy2 = _fjsp_setzero_v2r8();
206 fiz2 = _fjsp_setzero_v2r8();
207 fix3 = _fjsp_setzero_v2r8();
208 fiy3 = _fjsp_setzero_v2r8();
209 fiz3 = _fjsp_setzero_v2r8();
211 /* Reset potential sums */
212 velecsum = _fjsp_setzero_v2r8();
213 vvdwsum = _fjsp_setzero_v2r8();
215 /* Start inner kernel loop */
216 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
219 /* Get j neighbor index, and coordinate index */
222 j_coord_offsetA = DIM*jnrA;
223 j_coord_offsetB = DIM*jnrB;
225 /* load j atom coordinates */
226 gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
227 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
228 &jy2,&jz2,&jx3,&jy3,&jz3);
230 /* Calculate displacement vector */
231 dx00 = _fjsp_sub_v2r8(ix0,jx0);
232 dy00 = _fjsp_sub_v2r8(iy0,jy0);
233 dz00 = _fjsp_sub_v2r8(iz0,jz0);
234 dx11 = _fjsp_sub_v2r8(ix1,jx1);
235 dy11 = _fjsp_sub_v2r8(iy1,jy1);
236 dz11 = _fjsp_sub_v2r8(iz1,jz1);
237 dx12 = _fjsp_sub_v2r8(ix1,jx2);
238 dy12 = _fjsp_sub_v2r8(iy1,jy2);
239 dz12 = _fjsp_sub_v2r8(iz1,jz2);
240 dx13 = _fjsp_sub_v2r8(ix1,jx3);
241 dy13 = _fjsp_sub_v2r8(iy1,jy3);
242 dz13 = _fjsp_sub_v2r8(iz1,jz3);
243 dx21 = _fjsp_sub_v2r8(ix2,jx1);
244 dy21 = _fjsp_sub_v2r8(iy2,jy1);
245 dz21 = _fjsp_sub_v2r8(iz2,jz1);
246 dx22 = _fjsp_sub_v2r8(ix2,jx2);
247 dy22 = _fjsp_sub_v2r8(iy2,jy2);
248 dz22 = _fjsp_sub_v2r8(iz2,jz2);
249 dx23 = _fjsp_sub_v2r8(ix2,jx3);
250 dy23 = _fjsp_sub_v2r8(iy2,jy3);
251 dz23 = _fjsp_sub_v2r8(iz2,jz3);
252 dx31 = _fjsp_sub_v2r8(ix3,jx1);
253 dy31 = _fjsp_sub_v2r8(iy3,jy1);
254 dz31 = _fjsp_sub_v2r8(iz3,jz1);
255 dx32 = _fjsp_sub_v2r8(ix3,jx2);
256 dy32 = _fjsp_sub_v2r8(iy3,jy2);
257 dz32 = _fjsp_sub_v2r8(iz3,jz2);
258 dx33 = _fjsp_sub_v2r8(ix3,jx3);
259 dy33 = _fjsp_sub_v2r8(iy3,jy3);
260 dz33 = _fjsp_sub_v2r8(iz3,jz3);
262 /* Calculate squared distance and things based on it */
263 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
264 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
265 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
266 rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
267 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
268 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
269 rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
270 rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
271 rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
272 rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
274 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
275 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
276 rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
277 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
278 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
279 rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
280 rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
281 rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
282 rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
284 rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
285 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
286 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
287 rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
288 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
289 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
290 rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
291 rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
292 rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
293 rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
295 fjx0 = _fjsp_setzero_v2r8();
296 fjy0 = _fjsp_setzero_v2r8();
297 fjz0 = _fjsp_setzero_v2r8();
298 fjx1 = _fjsp_setzero_v2r8();
299 fjy1 = _fjsp_setzero_v2r8();
300 fjz1 = _fjsp_setzero_v2r8();
301 fjx2 = _fjsp_setzero_v2r8();
302 fjy2 = _fjsp_setzero_v2r8();
303 fjz2 = _fjsp_setzero_v2r8();
304 fjx3 = _fjsp_setzero_v2r8();
305 fjy3 = _fjsp_setzero_v2r8();
306 fjz3 = _fjsp_setzero_v2r8();
308 /**************************
309 * CALCULATE INTERACTIONS *
310 **************************/
312 if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
315 /* LENNARD-JONES DISPERSION/REPULSION */
317 rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
318 vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
319 vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
320 vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
321 _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
322 fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
324 cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
326 /* Update potential sum for this i atom from the interaction with this j atom. */
327 vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
328 vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
332 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
334 /* Update vectorial force */
335 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
336 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
337 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
339 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
340 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
341 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
345 /**************************
346 * CALCULATE INTERACTIONS *
347 **************************/
349 if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
352 /* REACTION-FIELD ELECTROSTATICS */
353 velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
354 felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
356 cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
358 /* Update potential sum for this i atom from the interaction with this j atom. */
359 velec = _fjsp_and_v2r8(velec,cutoff_mask);
360 velecsum = _fjsp_add_v2r8(velecsum,velec);
364 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
366 /* Update vectorial force */
367 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
368 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
369 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
371 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
372 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
373 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
377 /**************************
378 * CALCULATE INTERACTIONS *
379 **************************/
381 if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
384 /* REACTION-FIELD ELECTROSTATICS */
385 velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
386 felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
388 cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
390 /* Update potential sum for this i atom from the interaction with this j atom. */
391 velec = _fjsp_and_v2r8(velec,cutoff_mask);
392 velecsum = _fjsp_add_v2r8(velecsum,velec);
396 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
398 /* Update vectorial force */
399 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
400 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
401 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
403 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
404 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
405 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
409 /**************************
410 * CALCULATE INTERACTIONS *
411 **************************/
413 if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
416 /* REACTION-FIELD ELECTROSTATICS */
417 velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
418 felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
420 cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
422 /* Update potential sum for this i atom from the interaction with this j atom. */
423 velec = _fjsp_and_v2r8(velec,cutoff_mask);
424 velecsum = _fjsp_add_v2r8(velecsum,velec);
428 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
430 /* Update vectorial force */
431 fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
432 fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
433 fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
435 fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
436 fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
437 fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
441 /**************************
442 * CALCULATE INTERACTIONS *
443 **************************/
445 if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
448 /* REACTION-FIELD ELECTROSTATICS */
449 velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
450 felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
452 cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
454 /* Update potential sum for this i atom from the interaction with this j atom. */
455 velec = _fjsp_and_v2r8(velec,cutoff_mask);
456 velecsum = _fjsp_add_v2r8(velecsum,velec);
460 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
462 /* Update vectorial force */
463 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
464 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
465 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
467 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
468 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
469 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
473 /**************************
474 * CALCULATE INTERACTIONS *
475 **************************/
477 if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
480 /* REACTION-FIELD ELECTROSTATICS */
481 velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
482 felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
484 cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
486 /* Update potential sum for this i atom from the interaction with this j atom. */
487 velec = _fjsp_and_v2r8(velec,cutoff_mask);
488 velecsum = _fjsp_add_v2r8(velecsum,velec);
492 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
494 /* Update vectorial force */
495 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
496 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
497 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
499 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
500 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
501 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
505 /**************************
506 * CALCULATE INTERACTIONS *
507 **************************/
509 if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
512 /* REACTION-FIELD ELECTROSTATICS */
513 velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
514 felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
516 cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
518 /* Update potential sum for this i atom from the interaction with this j atom. */
519 velec = _fjsp_and_v2r8(velec,cutoff_mask);
520 velecsum = _fjsp_add_v2r8(velecsum,velec);
524 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
526 /* Update vectorial force */
527 fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
528 fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
529 fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
531 fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
532 fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
533 fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
537 /**************************
538 * CALCULATE INTERACTIONS *
539 **************************/
541 if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
544 /* REACTION-FIELD ELECTROSTATICS */
545 velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
546 felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
548 cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
550 /* Update potential sum for this i atom from the interaction with this j atom. */
551 velec = _fjsp_and_v2r8(velec,cutoff_mask);
552 velecsum = _fjsp_add_v2r8(velecsum,velec);
556 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
558 /* Update vectorial force */
559 fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
560 fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
561 fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
563 fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
564 fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
565 fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
569 /**************************
570 * CALCULATE INTERACTIONS *
571 **************************/
573 if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
576 /* REACTION-FIELD ELECTROSTATICS */
577 velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
578 felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
580 cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
582 /* Update potential sum for this i atom from the interaction with this j atom. */
583 velec = _fjsp_and_v2r8(velec,cutoff_mask);
584 velecsum = _fjsp_add_v2r8(velecsum,velec);
588 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
590 /* Update vectorial force */
591 fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
592 fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
593 fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
595 fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
596 fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
597 fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
601 /**************************
602 * CALCULATE INTERACTIONS *
603 **************************/
605 if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
608 /* REACTION-FIELD ELECTROSTATICS */
609 velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
610 felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
612 cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
614 /* Update potential sum for this i atom from the interaction with this j atom. */
615 velec = _fjsp_and_v2r8(velec,cutoff_mask);
616 velecsum = _fjsp_add_v2r8(velecsum,velec);
620 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
622 /* Update vectorial force */
623 fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
624 fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
625 fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
627 fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
628 fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
629 fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
633 gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
635 /* Inner loop uses 398 flops */
642 j_coord_offsetA = DIM*jnrA;
644 /* load j atom coordinates */
645 gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
646 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
647 &jy2,&jz2,&jx3,&jy3,&jz3);
649 /* Calculate displacement vector */
650 dx00 = _fjsp_sub_v2r8(ix0,jx0);
651 dy00 = _fjsp_sub_v2r8(iy0,jy0);
652 dz00 = _fjsp_sub_v2r8(iz0,jz0);
653 dx11 = _fjsp_sub_v2r8(ix1,jx1);
654 dy11 = _fjsp_sub_v2r8(iy1,jy1);
655 dz11 = _fjsp_sub_v2r8(iz1,jz1);
656 dx12 = _fjsp_sub_v2r8(ix1,jx2);
657 dy12 = _fjsp_sub_v2r8(iy1,jy2);
658 dz12 = _fjsp_sub_v2r8(iz1,jz2);
659 dx13 = _fjsp_sub_v2r8(ix1,jx3);
660 dy13 = _fjsp_sub_v2r8(iy1,jy3);
661 dz13 = _fjsp_sub_v2r8(iz1,jz3);
662 dx21 = _fjsp_sub_v2r8(ix2,jx1);
663 dy21 = _fjsp_sub_v2r8(iy2,jy1);
664 dz21 = _fjsp_sub_v2r8(iz2,jz1);
665 dx22 = _fjsp_sub_v2r8(ix2,jx2);
666 dy22 = _fjsp_sub_v2r8(iy2,jy2);
667 dz22 = _fjsp_sub_v2r8(iz2,jz2);
668 dx23 = _fjsp_sub_v2r8(ix2,jx3);
669 dy23 = _fjsp_sub_v2r8(iy2,jy3);
670 dz23 = _fjsp_sub_v2r8(iz2,jz3);
671 dx31 = _fjsp_sub_v2r8(ix3,jx1);
672 dy31 = _fjsp_sub_v2r8(iy3,jy1);
673 dz31 = _fjsp_sub_v2r8(iz3,jz1);
674 dx32 = _fjsp_sub_v2r8(ix3,jx2);
675 dy32 = _fjsp_sub_v2r8(iy3,jy2);
676 dz32 = _fjsp_sub_v2r8(iz3,jz2);
677 dx33 = _fjsp_sub_v2r8(ix3,jx3);
678 dy33 = _fjsp_sub_v2r8(iy3,jy3);
679 dz33 = _fjsp_sub_v2r8(iz3,jz3);
681 /* Calculate squared distance and things based on it */
682 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
683 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
684 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
685 rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
686 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
687 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
688 rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
689 rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
690 rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
691 rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
693 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
694 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
695 rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
696 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
697 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
698 rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
699 rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
700 rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
701 rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
703 rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
704 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
705 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
706 rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
707 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
708 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
709 rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
710 rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
711 rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
712 rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
714 fjx0 = _fjsp_setzero_v2r8();
715 fjy0 = _fjsp_setzero_v2r8();
716 fjz0 = _fjsp_setzero_v2r8();
717 fjx1 = _fjsp_setzero_v2r8();
718 fjy1 = _fjsp_setzero_v2r8();
719 fjz1 = _fjsp_setzero_v2r8();
720 fjx2 = _fjsp_setzero_v2r8();
721 fjy2 = _fjsp_setzero_v2r8();
722 fjz2 = _fjsp_setzero_v2r8();
723 fjx3 = _fjsp_setzero_v2r8();
724 fjy3 = _fjsp_setzero_v2r8();
725 fjz3 = _fjsp_setzero_v2r8();
727 /**************************
728 * CALCULATE INTERACTIONS *
729 **************************/
731 if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
734 /* LENNARD-JONES DISPERSION/REPULSION */
736 rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
737 vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
738 vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
739 vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
740 _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
741 fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
743 cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
745 /* Update potential sum for this i atom from the interaction with this j atom. */
746 vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
747 vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
748 vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
752 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
754 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
756 /* Update vectorial force */
757 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
758 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
759 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
761 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
762 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
763 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
767 /**************************
768 * CALCULATE INTERACTIONS *
769 **************************/
771 if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
774 /* REACTION-FIELD ELECTROSTATICS */
775 velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
776 felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
778 cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
780 /* Update potential sum for this i atom from the interaction with this j atom. */
781 velec = _fjsp_and_v2r8(velec,cutoff_mask);
782 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
783 velecsum = _fjsp_add_v2r8(velecsum,velec);
787 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
789 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
791 /* Update vectorial force */
792 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
793 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
794 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
796 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
797 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
798 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
802 /**************************
803 * CALCULATE INTERACTIONS *
804 **************************/
806 if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
809 /* REACTION-FIELD ELECTROSTATICS */
810 velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
811 felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
813 cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
815 /* Update potential sum for this i atom from the interaction with this j atom. */
816 velec = _fjsp_and_v2r8(velec,cutoff_mask);
817 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
818 velecsum = _fjsp_add_v2r8(velecsum,velec);
822 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
824 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
826 /* Update vectorial force */
827 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
828 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
829 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
831 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
832 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
833 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
837 /**************************
838 * CALCULATE INTERACTIONS *
839 **************************/
841 if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
844 /* REACTION-FIELD ELECTROSTATICS */
845 velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
846 felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
848 cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
850 /* Update potential sum for this i atom from the interaction with this j atom. */
851 velec = _fjsp_and_v2r8(velec,cutoff_mask);
852 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
853 velecsum = _fjsp_add_v2r8(velecsum,velec);
857 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
859 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
861 /* Update vectorial force */
862 fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
863 fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
864 fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
866 fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
867 fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
868 fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
872 /**************************
873 * CALCULATE INTERACTIONS *
874 **************************/
876 if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
879 /* REACTION-FIELD ELECTROSTATICS */
880 velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
881 felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
883 cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
885 /* Update potential sum for this i atom from the interaction with this j atom. */
886 velec = _fjsp_and_v2r8(velec,cutoff_mask);
887 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
888 velecsum = _fjsp_add_v2r8(velecsum,velec);
892 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
894 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
896 /* Update vectorial force */
897 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
898 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
899 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
901 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
902 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
903 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
907 /**************************
908 * CALCULATE INTERACTIONS *
909 **************************/
911 if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
914 /* REACTION-FIELD ELECTROSTATICS */
915 velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
916 felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
918 cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
920 /* Update potential sum for this i atom from the interaction with this j atom. */
921 velec = _fjsp_and_v2r8(velec,cutoff_mask);
922 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
923 velecsum = _fjsp_add_v2r8(velecsum,velec);
927 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
929 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
931 /* Update vectorial force */
932 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
933 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
934 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
936 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
937 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
938 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
942 /**************************
943 * CALCULATE INTERACTIONS *
944 **************************/
946 if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
949 /* REACTION-FIELD ELECTROSTATICS */
950 velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
951 felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
953 cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
955 /* Update potential sum for this i atom from the interaction with this j atom. */
956 velec = _fjsp_and_v2r8(velec,cutoff_mask);
957 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
958 velecsum = _fjsp_add_v2r8(velecsum,velec);
962 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
964 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
966 /* Update vectorial force */
967 fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
968 fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
969 fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
971 fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
972 fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
973 fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
977 /**************************
978 * CALCULATE INTERACTIONS *
979 **************************/
981 if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
984 /* REACTION-FIELD ELECTROSTATICS */
985 velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
986 felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
988 cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
990 /* Update potential sum for this i atom from the interaction with this j atom. */
991 velec = _fjsp_and_v2r8(velec,cutoff_mask);
992 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
993 velecsum = _fjsp_add_v2r8(velecsum,velec);
997 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
999 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1001 /* Update vectorial force */
1002 fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
1003 fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
1004 fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
1006 fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
1007 fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
1008 fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
1012 /**************************
1013 * CALCULATE INTERACTIONS *
1014 **************************/
1016 if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
1019 /* REACTION-FIELD ELECTROSTATICS */
1020 velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
1021 felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
1023 cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
1025 /* Update potential sum for this i atom from the interaction with this j atom. */
1026 velec = _fjsp_and_v2r8(velec,cutoff_mask);
1027 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1028 velecsum = _fjsp_add_v2r8(velecsum,velec);
1032 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1034 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1036 /* Update vectorial force */
1037 fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
1038 fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
1039 fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
1041 fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
1042 fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
1043 fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
1047 /**************************
1048 * CALCULATE INTERACTIONS *
1049 **************************/
1051 if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
1054 /* REACTION-FIELD ELECTROSTATICS */
1055 velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
1056 felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
1058 cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
1060 /* Update potential sum for this i atom from the interaction with this j atom. */
1061 velec = _fjsp_and_v2r8(velec,cutoff_mask);
1062 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1063 velecsum = _fjsp_add_v2r8(velecsum,velec);
1067 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1069 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1071 /* Update vectorial force */
1072 fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
1073 fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
1074 fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
1076 fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
1077 fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
1078 fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
1082 gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1084 /* Inner loop uses 398 flops */
1087 /* End of innermost loop */
1089 gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
1090 f+i_coord_offset,fshift+i_shift_offset);
1093 /* Update potential energies */
1094 gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
1095 gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
1097 /* Increment number of inner iterations */
1098 inneriter += j_index_end - j_index_start;
1100 /* Outer loop uses 26 flops */
1103 /* Increment number of outer iterations */
1106 /* Update outer/inner flops */
1108 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*398);
1111 * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_sparc64_hpc_ace_double
1112 * Electrostatics interaction: ReactionField
1113 * VdW interaction: LennardJones
1114 * Geometry: Water4-Water4
1115 * Calculate force/pot: Force
1118 nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_sparc64_hpc_ace_double
1119 (t_nblist * gmx_restrict nlist,
1120 rvec * gmx_restrict xx,
1121 rvec * gmx_restrict ff,
1122 t_forcerec * gmx_restrict fr,
1123 t_mdatoms * gmx_restrict mdatoms,
1124 nb_kernel_data_t * gmx_restrict kernel_data,
1125 t_nrnb * gmx_restrict nrnb)
1127 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1128 * just 0 for non-waters.
1129 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
1130 * jnr indices corresponding to data put in the four positions in the SIMD register.
1132 int i_shift_offset,i_coord_offset,outeriter,inneriter;
1133 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1135 int j_coord_offsetA,j_coord_offsetB;
1136 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
1137 real rcutoff_scalar;
1138 real *shiftvec,*fshift,*x,*f;
1139 _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1141 _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1143 _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1145 _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1147 _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
1148 int vdwjidx0A,vdwjidx0B;
1149 _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1150 int vdwjidx1A,vdwjidx1B;
1151 _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1152 int vdwjidx2A,vdwjidx2B;
1153 _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1154 int vdwjidx3A,vdwjidx3B;
1155 _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
1156 _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1157 _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1158 _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1159 _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
1160 _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1161 _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1162 _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
1163 _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
1164 _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
1165 _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
1166 _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
1169 _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
1172 _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
1173 _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
1174 _fjsp_v2r8 itab_tmp;
1175 _fjsp_v2r8 dummy_mask,cutoff_mask;
1176 _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
1177 _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
1178 union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1185 jindex = nlist->jindex;
1187 shiftidx = nlist->shift;
1189 shiftvec = fr->shift_vec[0];
1190 fshift = fr->fshift[0];
1191 facel = gmx_fjsp_set1_v2r8(fr->epsfac);
1192 charge = mdatoms->chargeA;
1193 krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
1194 krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
1195 crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
1196 nvdwtype = fr->ntype;
1197 vdwparam = fr->nbfp;
1198 vdwtype = mdatoms->typeA;
1200 /* Setup water-specific parameters */
1201 inr = nlist->iinr[0];
1202 iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1203 iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1204 iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
1205 vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
1207 jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
1208 jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
1209 jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
1210 vdwjidx0A = 2*vdwtype[inr+0];
1211 c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
1212 c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
1213 qq11 = _fjsp_mul_v2r8(iq1,jq1);
1214 qq12 = _fjsp_mul_v2r8(iq1,jq2);
1215 qq13 = _fjsp_mul_v2r8(iq1,jq3);
1216 qq21 = _fjsp_mul_v2r8(iq2,jq1);
1217 qq22 = _fjsp_mul_v2r8(iq2,jq2);
1218 qq23 = _fjsp_mul_v2r8(iq2,jq3);
1219 qq31 = _fjsp_mul_v2r8(iq3,jq1);
1220 qq32 = _fjsp_mul_v2r8(iq3,jq2);
1221 qq33 = _fjsp_mul_v2r8(iq3,jq3);
1223 /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
1224 rcutoff_scalar = fr->rcoulomb;
1225 rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
1226 rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
1228 sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
1229 rvdw = gmx_fjsp_set1_v2r8(fr->rvdw);
1231 /* Avoid stupid compiler warnings */
1233 j_coord_offsetA = 0;
1234 j_coord_offsetB = 0;
1239 /* Start outer loop over neighborlists */
1240 for(iidx=0; iidx<nri; iidx++)
1242 /* Load shift vector for this list */
1243 i_shift_offset = DIM*shiftidx[iidx];
1245 /* Load limits for loop over neighbors */
1246 j_index_start = jindex[iidx];
1247 j_index_end = jindex[iidx+1];
1249 /* Get outer coordinate index */
1251 i_coord_offset = DIM*inr;
1253 /* Load i particle coords and add shift vector */
1254 gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
1255 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
1257 fix0 = _fjsp_setzero_v2r8();
1258 fiy0 = _fjsp_setzero_v2r8();
1259 fiz0 = _fjsp_setzero_v2r8();
1260 fix1 = _fjsp_setzero_v2r8();
1261 fiy1 = _fjsp_setzero_v2r8();
1262 fiz1 = _fjsp_setzero_v2r8();
1263 fix2 = _fjsp_setzero_v2r8();
1264 fiy2 = _fjsp_setzero_v2r8();
1265 fiz2 = _fjsp_setzero_v2r8();
1266 fix3 = _fjsp_setzero_v2r8();
1267 fiy3 = _fjsp_setzero_v2r8();
1268 fiz3 = _fjsp_setzero_v2r8();
1270 /* Start inner kernel loop */
1271 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1274 /* Get j neighbor index, and coordinate index */
1276 jnrB = jjnr[jidx+1];
1277 j_coord_offsetA = DIM*jnrA;
1278 j_coord_offsetB = DIM*jnrB;
1280 /* load j atom coordinates */
1281 gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
1282 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
1283 &jy2,&jz2,&jx3,&jy3,&jz3);
1285 /* Calculate displacement vector */
1286 dx00 = _fjsp_sub_v2r8(ix0,jx0);
1287 dy00 = _fjsp_sub_v2r8(iy0,jy0);
1288 dz00 = _fjsp_sub_v2r8(iz0,jz0);
1289 dx11 = _fjsp_sub_v2r8(ix1,jx1);
1290 dy11 = _fjsp_sub_v2r8(iy1,jy1);
1291 dz11 = _fjsp_sub_v2r8(iz1,jz1);
1292 dx12 = _fjsp_sub_v2r8(ix1,jx2);
1293 dy12 = _fjsp_sub_v2r8(iy1,jy2);
1294 dz12 = _fjsp_sub_v2r8(iz1,jz2);
1295 dx13 = _fjsp_sub_v2r8(ix1,jx3);
1296 dy13 = _fjsp_sub_v2r8(iy1,jy3);
1297 dz13 = _fjsp_sub_v2r8(iz1,jz3);
1298 dx21 = _fjsp_sub_v2r8(ix2,jx1);
1299 dy21 = _fjsp_sub_v2r8(iy2,jy1);
1300 dz21 = _fjsp_sub_v2r8(iz2,jz1);
1301 dx22 = _fjsp_sub_v2r8(ix2,jx2);
1302 dy22 = _fjsp_sub_v2r8(iy2,jy2);
1303 dz22 = _fjsp_sub_v2r8(iz2,jz2);
1304 dx23 = _fjsp_sub_v2r8(ix2,jx3);
1305 dy23 = _fjsp_sub_v2r8(iy2,jy3);
1306 dz23 = _fjsp_sub_v2r8(iz2,jz3);
1307 dx31 = _fjsp_sub_v2r8(ix3,jx1);
1308 dy31 = _fjsp_sub_v2r8(iy3,jy1);
1309 dz31 = _fjsp_sub_v2r8(iz3,jz1);
1310 dx32 = _fjsp_sub_v2r8(ix3,jx2);
1311 dy32 = _fjsp_sub_v2r8(iy3,jy2);
1312 dz32 = _fjsp_sub_v2r8(iz3,jz2);
1313 dx33 = _fjsp_sub_v2r8(ix3,jx3);
1314 dy33 = _fjsp_sub_v2r8(iy3,jy3);
1315 dz33 = _fjsp_sub_v2r8(iz3,jz3);
1317 /* Calculate squared distance and things based on it */
1318 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1319 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1320 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1321 rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
1322 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1323 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1324 rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
1325 rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
1326 rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
1327 rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
1329 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
1330 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
1331 rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
1332 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
1333 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
1334 rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
1335 rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
1336 rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
1337 rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
1339 rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
1340 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
1341 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
1342 rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
1343 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
1344 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
1345 rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
1346 rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
1347 rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
1348 rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
1350 fjx0 = _fjsp_setzero_v2r8();
1351 fjy0 = _fjsp_setzero_v2r8();
1352 fjz0 = _fjsp_setzero_v2r8();
1353 fjx1 = _fjsp_setzero_v2r8();
1354 fjy1 = _fjsp_setzero_v2r8();
1355 fjz1 = _fjsp_setzero_v2r8();
1356 fjx2 = _fjsp_setzero_v2r8();
1357 fjy2 = _fjsp_setzero_v2r8();
1358 fjz2 = _fjsp_setzero_v2r8();
1359 fjx3 = _fjsp_setzero_v2r8();
1360 fjy3 = _fjsp_setzero_v2r8();
1361 fjz3 = _fjsp_setzero_v2r8();
1363 /**************************
1364 * CALCULATE INTERACTIONS *
1365 **************************/
1367 if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
1370 /* LENNARD-JONES DISPERSION/REPULSION */
1372 rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
1373 fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
1375 cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
1379 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1381 /* Update vectorial force */
1382 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
1383 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1384 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1386 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1387 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1388 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1392 /**************************
1393 * CALCULATE INTERACTIONS *
1394 **************************/
1396 if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
1399 /* REACTION-FIELD ELECTROSTATICS */
1400 felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
1402 cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
1406 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1408 /* Update vectorial force */
1409 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
1410 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1411 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1413 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1414 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1415 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1419 /**************************
1420 * CALCULATE INTERACTIONS *
1421 **************************/
1423 if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
1426 /* REACTION-FIELD ELECTROSTATICS */
1427 felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
1429 cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
1433 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1435 /* Update vectorial force */
1436 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
1437 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1438 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1440 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1441 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1442 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1446 /**************************
1447 * CALCULATE INTERACTIONS *
1448 **************************/
1450 if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
1453 /* REACTION-FIELD ELECTROSTATICS */
1454 felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
1456 cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
1460 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1462 /* Update vectorial force */
1463 fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
1464 fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
1465 fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
1467 fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
1468 fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
1469 fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
1473 /**************************
1474 * CALCULATE INTERACTIONS *
1475 **************************/
1477 if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1480 /* REACTION-FIELD ELECTROSTATICS */
1481 felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1483 cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1487 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1489 /* Update vectorial force */
1490 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1491 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1492 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1494 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1495 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1496 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1500 /**************************
1501 * CALCULATE INTERACTIONS *
1502 **************************/
1504 if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1507 /* REACTION-FIELD ELECTROSTATICS */
1508 felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1510 cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1514 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1516 /* Update vectorial force */
1517 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1518 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1519 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1521 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1522 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1523 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1527 /**************************
1528 * CALCULATE INTERACTIONS *
1529 **************************/
1531 if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
1534 /* REACTION-FIELD ELECTROSTATICS */
1535 felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
1537 cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
1541 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1543 /* Update vectorial force */
1544 fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
1545 fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
1546 fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
1548 fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
1549 fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
1550 fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
1554 /**************************
1555 * CALCULATE INTERACTIONS *
1556 **************************/
1558 if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
1561 /* REACTION-FIELD ELECTROSTATICS */
1562 felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
1564 cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
1568 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1570 /* Update vectorial force */
1571 fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
1572 fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
1573 fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
1575 fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
1576 fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
1577 fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
1581 /**************************
1582 * CALCULATE INTERACTIONS *
1583 **************************/
1585 if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
1588 /* REACTION-FIELD ELECTROSTATICS */
1589 felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
1591 cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
1595 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1597 /* Update vectorial force */
1598 fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
1599 fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
1600 fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
1602 fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
1603 fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
1604 fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
1608 /**************************
1609 * CALCULATE INTERACTIONS *
1610 **************************/
1612 if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
1615 /* REACTION-FIELD ELECTROSTATICS */
1616 felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
1618 cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
1622 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1624 /* Update vectorial force */
1625 fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
1626 fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
1627 fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
1629 fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
1630 fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
1631 fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
1635 gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1637 /* Inner loop uses 333 flops */
1640 if(jidx<j_index_end)
1644 j_coord_offsetA = DIM*jnrA;
1646 /* load j atom coordinates */
1647 gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1648 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
1649 &jy2,&jz2,&jx3,&jy3,&jz3);
1651 /* Calculate displacement vector */
1652 dx00 = _fjsp_sub_v2r8(ix0,jx0);
1653 dy00 = _fjsp_sub_v2r8(iy0,jy0);
1654 dz00 = _fjsp_sub_v2r8(iz0,jz0);
1655 dx11 = _fjsp_sub_v2r8(ix1,jx1);
1656 dy11 = _fjsp_sub_v2r8(iy1,jy1);
1657 dz11 = _fjsp_sub_v2r8(iz1,jz1);
1658 dx12 = _fjsp_sub_v2r8(ix1,jx2);
1659 dy12 = _fjsp_sub_v2r8(iy1,jy2);
1660 dz12 = _fjsp_sub_v2r8(iz1,jz2);
1661 dx13 = _fjsp_sub_v2r8(ix1,jx3);
1662 dy13 = _fjsp_sub_v2r8(iy1,jy3);
1663 dz13 = _fjsp_sub_v2r8(iz1,jz3);
1664 dx21 = _fjsp_sub_v2r8(ix2,jx1);
1665 dy21 = _fjsp_sub_v2r8(iy2,jy1);
1666 dz21 = _fjsp_sub_v2r8(iz2,jz1);
1667 dx22 = _fjsp_sub_v2r8(ix2,jx2);
1668 dy22 = _fjsp_sub_v2r8(iy2,jy2);
1669 dz22 = _fjsp_sub_v2r8(iz2,jz2);
1670 dx23 = _fjsp_sub_v2r8(ix2,jx3);
1671 dy23 = _fjsp_sub_v2r8(iy2,jy3);
1672 dz23 = _fjsp_sub_v2r8(iz2,jz3);
1673 dx31 = _fjsp_sub_v2r8(ix3,jx1);
1674 dy31 = _fjsp_sub_v2r8(iy3,jy1);
1675 dz31 = _fjsp_sub_v2r8(iz3,jz1);
1676 dx32 = _fjsp_sub_v2r8(ix3,jx2);
1677 dy32 = _fjsp_sub_v2r8(iy3,jy2);
1678 dz32 = _fjsp_sub_v2r8(iz3,jz2);
1679 dx33 = _fjsp_sub_v2r8(ix3,jx3);
1680 dy33 = _fjsp_sub_v2r8(iy3,jy3);
1681 dz33 = _fjsp_sub_v2r8(iz3,jz3);
1683 /* Calculate squared distance and things based on it */
1684 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1685 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1686 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1687 rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
1688 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1689 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1690 rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
1691 rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
1692 rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
1693 rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
1695 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
1696 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
1697 rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
1698 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
1699 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
1700 rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
1701 rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
1702 rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
1703 rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
1705 rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
1706 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
1707 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
1708 rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
1709 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
1710 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
1711 rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
1712 rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
1713 rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
1714 rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
1716 fjx0 = _fjsp_setzero_v2r8();
1717 fjy0 = _fjsp_setzero_v2r8();
1718 fjz0 = _fjsp_setzero_v2r8();
1719 fjx1 = _fjsp_setzero_v2r8();
1720 fjy1 = _fjsp_setzero_v2r8();
1721 fjz1 = _fjsp_setzero_v2r8();
1722 fjx2 = _fjsp_setzero_v2r8();
1723 fjy2 = _fjsp_setzero_v2r8();
1724 fjz2 = _fjsp_setzero_v2r8();
1725 fjx3 = _fjsp_setzero_v2r8();
1726 fjy3 = _fjsp_setzero_v2r8();
1727 fjz3 = _fjsp_setzero_v2r8();
1729 /**************************
1730 * CALCULATE INTERACTIONS *
1731 **************************/
1733 if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
1736 /* LENNARD-JONES DISPERSION/REPULSION */
1738 rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
1739 fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
1741 cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
1745 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1747 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1749 /* Update vectorial force */
1750 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
1751 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1752 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1754 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1755 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1756 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1760 /**************************
1761 * CALCULATE INTERACTIONS *
1762 **************************/
1764 if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
1767 /* REACTION-FIELD ELECTROSTATICS */
1768 felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
1770 cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
1774 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1776 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1778 /* Update vectorial force */
1779 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
1780 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1781 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1783 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1784 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1785 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1789 /**************************
1790 * CALCULATE INTERACTIONS *
1791 **************************/
1793 if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
1796 /* REACTION-FIELD ELECTROSTATICS */
1797 felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
1799 cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
1803 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1805 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1807 /* Update vectorial force */
1808 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
1809 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1810 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1812 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1813 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1814 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1818 /**************************
1819 * CALCULATE INTERACTIONS *
1820 **************************/
1822 if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
1825 /* REACTION-FIELD ELECTROSTATICS */
1826 felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
1828 cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
1832 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1834 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1836 /* Update vectorial force */
1837 fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
1838 fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
1839 fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
1841 fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
1842 fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
1843 fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
1847 /**************************
1848 * CALCULATE INTERACTIONS *
1849 **************************/
1851 if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1854 /* REACTION-FIELD ELECTROSTATICS */
1855 felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1857 cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1861 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1863 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1865 /* Update vectorial force */
1866 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1867 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1868 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1870 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1871 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1872 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1876 /**************************
1877 * CALCULATE INTERACTIONS *
1878 **************************/
1880 if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1883 /* REACTION-FIELD ELECTROSTATICS */
1884 felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1886 cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1890 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1892 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1894 /* Update vectorial force */
1895 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1896 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1897 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1899 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1900 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1901 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1905 /**************************
1906 * CALCULATE INTERACTIONS *
1907 **************************/
1909 if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
1912 /* REACTION-FIELD ELECTROSTATICS */
1913 felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
1915 cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
1919 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1921 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1923 /* Update vectorial force */
1924 fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
1925 fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
1926 fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
1928 fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
1929 fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
1930 fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
1934 /**************************
1935 * CALCULATE INTERACTIONS *
1936 **************************/
1938 if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
1941 /* REACTION-FIELD ELECTROSTATICS */
1942 felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
1944 cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
1948 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1950 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1952 /* Update vectorial force */
1953 fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
1954 fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
1955 fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
1957 fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
1958 fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
1959 fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
1963 /**************************
1964 * CALCULATE INTERACTIONS *
1965 **************************/
1967 if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
1970 /* REACTION-FIELD ELECTROSTATICS */
1971 felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
1973 cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
1977 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1979 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1981 /* Update vectorial force */
1982 fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
1983 fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
1984 fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
1986 fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
1987 fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
1988 fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
1992 /**************************
1993 * CALCULATE INTERACTIONS *
1994 **************************/
1996 if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
1999 /* REACTION-FIELD ELECTROSTATICS */
2000 felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
2002 cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
2006 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
2008 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2010 /* Update vectorial force */
2011 fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
2012 fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
2013 fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
2015 fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
2016 fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
2017 fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
2021 gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
2023 /* Inner loop uses 333 flops */
2026 /* End of innermost loop */
2028 gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
2029 f+i_coord_offset,fshift+i_shift_offset);
2031 /* Increment number of inner iterations */
2032 inneriter += j_index_end - j_index_start;
2034 /* Outer loop uses 24 flops */
2037 /* Increment number of outer iterations */
2040 /* Update outer/inner flops */
2042 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*333);