2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
42 #include "../nb_kernel.h"
43 #include "gromacs/legacyheaders/types/simple.h"
44 #include "gromacs/math/vec.h"
45 #include "gromacs/legacyheaders/nrnb.h"
47 #include "kernelutil_sparc64_hpc_ace_double.h"
50 * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_sparc64_hpc_ace_double
51 * Electrostatics interaction: ReactionField
52 * VdW interaction: LennardJones
53 * Geometry: Water4-Water4
54 * Calculate force/pot: PotentialAndForce
57 nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_sparc64_hpc_ace_double
58 (t_nblist * gmx_restrict nlist,
59 rvec * gmx_restrict xx,
60 rvec * gmx_restrict ff,
61 t_forcerec * gmx_restrict fr,
62 t_mdatoms * gmx_restrict mdatoms,
63 nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
64 t_nrnb * gmx_restrict nrnb)
66 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
67 * just 0 for non-waters.
68 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
69 * jnr indices corresponding to data put in the four positions in the SIMD register.
71 int i_shift_offset,i_coord_offset,outeriter,inneriter;
72 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
74 int j_coord_offsetA,j_coord_offsetB;
75 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
77 real *shiftvec,*fshift,*x,*f;
78 _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
80 _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
82 _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
84 _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
86 _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
87 int vdwjidx0A,vdwjidx0B;
88 _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
89 int vdwjidx1A,vdwjidx1B;
90 _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
91 int vdwjidx2A,vdwjidx2B;
92 _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
93 int vdwjidx3A,vdwjidx3B;
94 _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
95 _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
96 _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
97 _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
98 _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
99 _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
100 _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
101 _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
102 _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
103 _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
104 _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
105 _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
108 _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
111 _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
112 _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
113 _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
114 real rswitch_scalar,d_scalar;
116 _fjsp_v2r8 dummy_mask,cutoff_mask;
117 _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
118 _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
119 union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
126 jindex = nlist->jindex;
128 shiftidx = nlist->shift;
130 shiftvec = fr->shift_vec[0];
131 fshift = fr->fshift[0];
132 facel = gmx_fjsp_set1_v2r8(fr->epsfac);
133 charge = mdatoms->chargeA;
134 krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
135 krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
136 crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
137 nvdwtype = fr->ntype;
139 vdwtype = mdatoms->typeA;
141 /* Setup water-specific parameters */
142 inr = nlist->iinr[0];
143 iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
144 iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
145 iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
146 vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
148 jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
149 jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
150 jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
151 vdwjidx0A = 2*vdwtype[inr+0];
152 c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
153 c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
154 qq11 = _fjsp_mul_v2r8(iq1,jq1);
155 qq12 = _fjsp_mul_v2r8(iq1,jq2);
156 qq13 = _fjsp_mul_v2r8(iq1,jq3);
157 qq21 = _fjsp_mul_v2r8(iq2,jq1);
158 qq22 = _fjsp_mul_v2r8(iq2,jq2);
159 qq23 = _fjsp_mul_v2r8(iq2,jq3);
160 qq31 = _fjsp_mul_v2r8(iq3,jq1);
161 qq32 = _fjsp_mul_v2r8(iq3,jq2);
162 qq33 = _fjsp_mul_v2r8(iq3,jq3);
164 /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
165 rcutoff_scalar = fr->rcoulomb;
166 rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
167 rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
169 rswitch_scalar = fr->rvdw_switch;
170 rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
171 /* Setup switch parameters */
172 d_scalar = rcutoff_scalar-rswitch_scalar;
173 d = gmx_fjsp_set1_v2r8(d_scalar);
174 swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
175 swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
176 swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
177 swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
178 swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
179 swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
181 /* Avoid stupid compiler warnings */
189 /* Start outer loop over neighborlists */
190 for(iidx=0; iidx<nri; iidx++)
192 /* Load shift vector for this list */
193 i_shift_offset = DIM*shiftidx[iidx];
195 /* Load limits for loop over neighbors */
196 j_index_start = jindex[iidx];
197 j_index_end = jindex[iidx+1];
199 /* Get outer coordinate index */
201 i_coord_offset = DIM*inr;
203 /* Load i particle coords and add shift vector */
204 gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
205 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
207 fix0 = _fjsp_setzero_v2r8();
208 fiy0 = _fjsp_setzero_v2r8();
209 fiz0 = _fjsp_setzero_v2r8();
210 fix1 = _fjsp_setzero_v2r8();
211 fiy1 = _fjsp_setzero_v2r8();
212 fiz1 = _fjsp_setzero_v2r8();
213 fix2 = _fjsp_setzero_v2r8();
214 fiy2 = _fjsp_setzero_v2r8();
215 fiz2 = _fjsp_setzero_v2r8();
216 fix3 = _fjsp_setzero_v2r8();
217 fiy3 = _fjsp_setzero_v2r8();
218 fiz3 = _fjsp_setzero_v2r8();
220 /* Reset potential sums */
221 velecsum = _fjsp_setzero_v2r8();
222 vvdwsum = _fjsp_setzero_v2r8();
224 /* Start inner kernel loop */
225 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
228 /* Get j neighbor index, and coordinate index */
231 j_coord_offsetA = DIM*jnrA;
232 j_coord_offsetB = DIM*jnrB;
234 /* load j atom coordinates */
235 gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
236 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
237 &jy2,&jz2,&jx3,&jy3,&jz3);
239 /* Calculate displacement vector */
240 dx00 = _fjsp_sub_v2r8(ix0,jx0);
241 dy00 = _fjsp_sub_v2r8(iy0,jy0);
242 dz00 = _fjsp_sub_v2r8(iz0,jz0);
243 dx11 = _fjsp_sub_v2r8(ix1,jx1);
244 dy11 = _fjsp_sub_v2r8(iy1,jy1);
245 dz11 = _fjsp_sub_v2r8(iz1,jz1);
246 dx12 = _fjsp_sub_v2r8(ix1,jx2);
247 dy12 = _fjsp_sub_v2r8(iy1,jy2);
248 dz12 = _fjsp_sub_v2r8(iz1,jz2);
249 dx13 = _fjsp_sub_v2r8(ix1,jx3);
250 dy13 = _fjsp_sub_v2r8(iy1,jy3);
251 dz13 = _fjsp_sub_v2r8(iz1,jz3);
252 dx21 = _fjsp_sub_v2r8(ix2,jx1);
253 dy21 = _fjsp_sub_v2r8(iy2,jy1);
254 dz21 = _fjsp_sub_v2r8(iz2,jz1);
255 dx22 = _fjsp_sub_v2r8(ix2,jx2);
256 dy22 = _fjsp_sub_v2r8(iy2,jy2);
257 dz22 = _fjsp_sub_v2r8(iz2,jz2);
258 dx23 = _fjsp_sub_v2r8(ix2,jx3);
259 dy23 = _fjsp_sub_v2r8(iy2,jy3);
260 dz23 = _fjsp_sub_v2r8(iz2,jz3);
261 dx31 = _fjsp_sub_v2r8(ix3,jx1);
262 dy31 = _fjsp_sub_v2r8(iy3,jy1);
263 dz31 = _fjsp_sub_v2r8(iz3,jz1);
264 dx32 = _fjsp_sub_v2r8(ix3,jx2);
265 dy32 = _fjsp_sub_v2r8(iy3,jy2);
266 dz32 = _fjsp_sub_v2r8(iz3,jz2);
267 dx33 = _fjsp_sub_v2r8(ix3,jx3);
268 dy33 = _fjsp_sub_v2r8(iy3,jy3);
269 dz33 = _fjsp_sub_v2r8(iz3,jz3);
271 /* Calculate squared distance and things based on it */
272 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
273 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
274 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
275 rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
276 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
277 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
278 rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
279 rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
280 rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
281 rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
283 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
284 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
285 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
286 rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
287 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
288 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
289 rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
290 rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
291 rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
292 rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
294 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
295 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
296 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
297 rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
298 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
299 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
300 rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
301 rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
302 rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
303 rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
305 fjx0 = _fjsp_setzero_v2r8();
306 fjy0 = _fjsp_setzero_v2r8();
307 fjz0 = _fjsp_setzero_v2r8();
308 fjx1 = _fjsp_setzero_v2r8();
309 fjy1 = _fjsp_setzero_v2r8();
310 fjz1 = _fjsp_setzero_v2r8();
311 fjx2 = _fjsp_setzero_v2r8();
312 fjy2 = _fjsp_setzero_v2r8();
313 fjz2 = _fjsp_setzero_v2r8();
314 fjx3 = _fjsp_setzero_v2r8();
315 fjy3 = _fjsp_setzero_v2r8();
316 fjz3 = _fjsp_setzero_v2r8();
318 /**************************
319 * CALCULATE INTERACTIONS *
320 **************************/
322 if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
325 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
327 /* LENNARD-JONES DISPERSION/REPULSION */
329 rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
330 vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
331 vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
332 vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
333 fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
335 d = _fjsp_sub_v2r8(r00,rswitch);
336 d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
337 d2 = _fjsp_mul_v2r8(d,d);
338 sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
340 dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
342 /* Evaluate switch function */
343 /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
344 fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
345 vvdw = _fjsp_mul_v2r8(vvdw,sw);
346 cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
348 /* Update potential sum for this i atom from the interaction with this j atom. */
349 vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
350 vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
354 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
356 /* Update vectorial force */
357 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
358 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
359 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
361 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
362 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
363 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
367 /**************************
368 * CALCULATE INTERACTIONS *
369 **************************/
371 if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
374 /* REACTION-FIELD ELECTROSTATICS */
375 velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
376 felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
378 cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
380 /* Update potential sum for this i atom from the interaction with this j atom. */
381 velec = _fjsp_and_v2r8(velec,cutoff_mask);
382 velecsum = _fjsp_add_v2r8(velecsum,velec);
386 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
388 /* Update vectorial force */
389 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
390 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
391 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
393 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
394 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
395 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
399 /**************************
400 * CALCULATE INTERACTIONS *
401 **************************/
403 if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
406 /* REACTION-FIELD ELECTROSTATICS */
407 velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
408 felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
410 cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
412 /* Update potential sum for this i atom from the interaction with this j atom. */
413 velec = _fjsp_and_v2r8(velec,cutoff_mask);
414 velecsum = _fjsp_add_v2r8(velecsum,velec);
418 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
420 /* Update vectorial force */
421 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
422 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
423 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
425 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
426 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
427 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
431 /**************************
432 * CALCULATE INTERACTIONS *
433 **************************/
435 if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
438 /* REACTION-FIELD ELECTROSTATICS */
439 velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
440 felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
442 cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
444 /* Update potential sum for this i atom from the interaction with this j atom. */
445 velec = _fjsp_and_v2r8(velec,cutoff_mask);
446 velecsum = _fjsp_add_v2r8(velecsum,velec);
450 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
452 /* Update vectorial force */
453 fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
454 fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
455 fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
457 fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
458 fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
459 fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
463 /**************************
464 * CALCULATE INTERACTIONS *
465 **************************/
467 if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
470 /* REACTION-FIELD ELECTROSTATICS */
471 velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
472 felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
474 cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
476 /* Update potential sum for this i atom from the interaction with this j atom. */
477 velec = _fjsp_and_v2r8(velec,cutoff_mask);
478 velecsum = _fjsp_add_v2r8(velecsum,velec);
482 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
484 /* Update vectorial force */
485 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
486 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
487 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
489 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
490 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
491 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
495 /**************************
496 * CALCULATE INTERACTIONS *
497 **************************/
499 if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
502 /* REACTION-FIELD ELECTROSTATICS */
503 velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
504 felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
506 cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
508 /* Update potential sum for this i atom from the interaction with this j atom. */
509 velec = _fjsp_and_v2r8(velec,cutoff_mask);
510 velecsum = _fjsp_add_v2r8(velecsum,velec);
514 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
516 /* Update vectorial force */
517 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
518 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
519 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
521 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
522 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
523 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
527 /**************************
528 * CALCULATE INTERACTIONS *
529 **************************/
531 if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
534 /* REACTION-FIELD ELECTROSTATICS */
535 velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
536 felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
538 cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
540 /* Update potential sum for this i atom from the interaction with this j atom. */
541 velec = _fjsp_and_v2r8(velec,cutoff_mask);
542 velecsum = _fjsp_add_v2r8(velecsum,velec);
546 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
548 /* Update vectorial force */
549 fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
550 fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
551 fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
553 fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
554 fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
555 fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
559 /**************************
560 * CALCULATE INTERACTIONS *
561 **************************/
563 if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
566 /* REACTION-FIELD ELECTROSTATICS */
567 velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
568 felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
570 cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
572 /* Update potential sum for this i atom from the interaction with this j atom. */
573 velec = _fjsp_and_v2r8(velec,cutoff_mask);
574 velecsum = _fjsp_add_v2r8(velecsum,velec);
578 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
580 /* Update vectorial force */
581 fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
582 fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
583 fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
585 fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
586 fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
587 fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
591 /**************************
592 * CALCULATE INTERACTIONS *
593 **************************/
595 if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
598 /* REACTION-FIELD ELECTROSTATICS */
599 velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
600 felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
602 cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
604 /* Update potential sum for this i atom from the interaction with this j atom. */
605 velec = _fjsp_and_v2r8(velec,cutoff_mask);
606 velecsum = _fjsp_add_v2r8(velecsum,velec);
610 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
612 /* Update vectorial force */
613 fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
614 fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
615 fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
617 fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
618 fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
619 fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
623 /**************************
624 * CALCULATE INTERACTIONS *
625 **************************/
627 if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
630 /* REACTION-FIELD ELECTROSTATICS */
631 velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
632 felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
634 cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
636 /* Update potential sum for this i atom from the interaction with this j atom. */
637 velec = _fjsp_and_v2r8(velec,cutoff_mask);
638 velecsum = _fjsp_add_v2r8(velecsum,velec);
642 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
644 /* Update vectorial force */
645 fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
646 fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
647 fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
649 fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
650 fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
651 fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
655 gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
657 /* Inner loop uses 416 flops */
664 j_coord_offsetA = DIM*jnrA;
666 /* load j atom coordinates */
667 gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
668 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
669 &jy2,&jz2,&jx3,&jy3,&jz3);
671 /* Calculate displacement vector */
672 dx00 = _fjsp_sub_v2r8(ix0,jx0);
673 dy00 = _fjsp_sub_v2r8(iy0,jy0);
674 dz00 = _fjsp_sub_v2r8(iz0,jz0);
675 dx11 = _fjsp_sub_v2r8(ix1,jx1);
676 dy11 = _fjsp_sub_v2r8(iy1,jy1);
677 dz11 = _fjsp_sub_v2r8(iz1,jz1);
678 dx12 = _fjsp_sub_v2r8(ix1,jx2);
679 dy12 = _fjsp_sub_v2r8(iy1,jy2);
680 dz12 = _fjsp_sub_v2r8(iz1,jz2);
681 dx13 = _fjsp_sub_v2r8(ix1,jx3);
682 dy13 = _fjsp_sub_v2r8(iy1,jy3);
683 dz13 = _fjsp_sub_v2r8(iz1,jz3);
684 dx21 = _fjsp_sub_v2r8(ix2,jx1);
685 dy21 = _fjsp_sub_v2r8(iy2,jy1);
686 dz21 = _fjsp_sub_v2r8(iz2,jz1);
687 dx22 = _fjsp_sub_v2r8(ix2,jx2);
688 dy22 = _fjsp_sub_v2r8(iy2,jy2);
689 dz22 = _fjsp_sub_v2r8(iz2,jz2);
690 dx23 = _fjsp_sub_v2r8(ix2,jx3);
691 dy23 = _fjsp_sub_v2r8(iy2,jy3);
692 dz23 = _fjsp_sub_v2r8(iz2,jz3);
693 dx31 = _fjsp_sub_v2r8(ix3,jx1);
694 dy31 = _fjsp_sub_v2r8(iy3,jy1);
695 dz31 = _fjsp_sub_v2r8(iz3,jz1);
696 dx32 = _fjsp_sub_v2r8(ix3,jx2);
697 dy32 = _fjsp_sub_v2r8(iy3,jy2);
698 dz32 = _fjsp_sub_v2r8(iz3,jz2);
699 dx33 = _fjsp_sub_v2r8(ix3,jx3);
700 dy33 = _fjsp_sub_v2r8(iy3,jy3);
701 dz33 = _fjsp_sub_v2r8(iz3,jz3);
703 /* Calculate squared distance and things based on it */
704 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
705 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
706 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
707 rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
708 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
709 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
710 rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
711 rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
712 rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
713 rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
715 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
716 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
717 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
718 rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
719 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
720 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
721 rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
722 rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
723 rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
724 rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
726 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
727 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
728 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
729 rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
730 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
731 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
732 rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
733 rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
734 rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
735 rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
737 fjx0 = _fjsp_setzero_v2r8();
738 fjy0 = _fjsp_setzero_v2r8();
739 fjz0 = _fjsp_setzero_v2r8();
740 fjx1 = _fjsp_setzero_v2r8();
741 fjy1 = _fjsp_setzero_v2r8();
742 fjz1 = _fjsp_setzero_v2r8();
743 fjx2 = _fjsp_setzero_v2r8();
744 fjy2 = _fjsp_setzero_v2r8();
745 fjz2 = _fjsp_setzero_v2r8();
746 fjx3 = _fjsp_setzero_v2r8();
747 fjy3 = _fjsp_setzero_v2r8();
748 fjz3 = _fjsp_setzero_v2r8();
750 /**************************
751 * CALCULATE INTERACTIONS *
752 **************************/
754 if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
757 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
759 /* LENNARD-JONES DISPERSION/REPULSION */
761 rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
762 vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
763 vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
764 vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
765 fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
767 d = _fjsp_sub_v2r8(r00,rswitch);
768 d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
769 d2 = _fjsp_mul_v2r8(d,d);
770 sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
772 dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
774 /* Evaluate switch function */
775 /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
776 fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
777 vvdw = _fjsp_mul_v2r8(vvdw,sw);
778 cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
780 /* Update potential sum for this i atom from the interaction with this j atom. */
781 vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
782 vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
783 vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
787 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
789 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
791 /* Update vectorial force */
792 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
793 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
794 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
796 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
797 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
798 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
802 /**************************
803 * CALCULATE INTERACTIONS *
804 **************************/
806 if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
809 /* REACTION-FIELD ELECTROSTATICS */
810 velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
811 felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
813 cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
815 /* Update potential sum for this i atom from the interaction with this j atom. */
816 velec = _fjsp_and_v2r8(velec,cutoff_mask);
817 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
818 velecsum = _fjsp_add_v2r8(velecsum,velec);
822 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
824 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
826 /* Update vectorial force */
827 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
828 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
829 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
831 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
832 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
833 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
837 /**************************
838 * CALCULATE INTERACTIONS *
839 **************************/
841 if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
844 /* REACTION-FIELD ELECTROSTATICS */
845 velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
846 felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
848 cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
850 /* Update potential sum for this i atom from the interaction with this j atom. */
851 velec = _fjsp_and_v2r8(velec,cutoff_mask);
852 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
853 velecsum = _fjsp_add_v2r8(velecsum,velec);
857 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
859 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
861 /* Update vectorial force */
862 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
863 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
864 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
866 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
867 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
868 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
872 /**************************
873 * CALCULATE INTERACTIONS *
874 **************************/
876 if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
879 /* REACTION-FIELD ELECTROSTATICS */
880 velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
881 felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
883 cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
885 /* Update potential sum for this i atom from the interaction with this j atom. */
886 velec = _fjsp_and_v2r8(velec,cutoff_mask);
887 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
888 velecsum = _fjsp_add_v2r8(velecsum,velec);
892 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
894 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
896 /* Update vectorial force */
897 fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
898 fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
899 fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
901 fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
902 fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
903 fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
907 /**************************
908 * CALCULATE INTERACTIONS *
909 **************************/
911 if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
914 /* REACTION-FIELD ELECTROSTATICS */
915 velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
916 felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
918 cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
920 /* Update potential sum for this i atom from the interaction with this j atom. */
921 velec = _fjsp_and_v2r8(velec,cutoff_mask);
922 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
923 velecsum = _fjsp_add_v2r8(velecsum,velec);
927 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
929 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
931 /* Update vectorial force */
932 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
933 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
934 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
936 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
937 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
938 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
942 /**************************
943 * CALCULATE INTERACTIONS *
944 **************************/
946 if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
949 /* REACTION-FIELD ELECTROSTATICS */
950 velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
951 felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
953 cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
955 /* Update potential sum for this i atom from the interaction with this j atom. */
956 velec = _fjsp_and_v2r8(velec,cutoff_mask);
957 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
958 velecsum = _fjsp_add_v2r8(velecsum,velec);
962 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
964 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
966 /* Update vectorial force */
967 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
968 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
969 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
971 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
972 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
973 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
977 /**************************
978 * CALCULATE INTERACTIONS *
979 **************************/
981 if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
984 /* REACTION-FIELD ELECTROSTATICS */
985 velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
986 felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
988 cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
990 /* Update potential sum for this i atom from the interaction with this j atom. */
991 velec = _fjsp_and_v2r8(velec,cutoff_mask);
992 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
993 velecsum = _fjsp_add_v2r8(velecsum,velec);
997 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
999 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1001 /* Update vectorial force */
1002 fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
1003 fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
1004 fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
1006 fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
1007 fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
1008 fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
1012 /**************************
1013 * CALCULATE INTERACTIONS *
1014 **************************/
1016 if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
1019 /* REACTION-FIELD ELECTROSTATICS */
1020 velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
1021 felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
1023 cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
1025 /* Update potential sum for this i atom from the interaction with this j atom. */
1026 velec = _fjsp_and_v2r8(velec,cutoff_mask);
1027 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1028 velecsum = _fjsp_add_v2r8(velecsum,velec);
1032 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1034 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1036 /* Update vectorial force */
1037 fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
1038 fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
1039 fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
1041 fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
1042 fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
1043 fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
1047 /**************************
1048 * CALCULATE INTERACTIONS *
1049 **************************/
1051 if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
1054 /* REACTION-FIELD ELECTROSTATICS */
1055 velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
1056 felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
1058 cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
1060 /* Update potential sum for this i atom from the interaction with this j atom. */
1061 velec = _fjsp_and_v2r8(velec,cutoff_mask);
1062 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1063 velecsum = _fjsp_add_v2r8(velecsum,velec);
1067 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1069 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1071 /* Update vectorial force */
1072 fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
1073 fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
1074 fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
1076 fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
1077 fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
1078 fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
1082 /**************************
1083 * CALCULATE INTERACTIONS *
1084 **************************/
1086 if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
1089 /* REACTION-FIELD ELECTROSTATICS */
1090 velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
1091 felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
1093 cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
1095 /* Update potential sum for this i atom from the interaction with this j atom. */
1096 velec = _fjsp_and_v2r8(velec,cutoff_mask);
1097 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1098 velecsum = _fjsp_add_v2r8(velecsum,velec);
1102 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1104 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1106 /* Update vectorial force */
1107 fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
1108 fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
1109 fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
1111 fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
1112 fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
1113 fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
1117 gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1119 /* Inner loop uses 416 flops */
1122 /* End of innermost loop */
1124 gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
1125 f+i_coord_offset,fshift+i_shift_offset);
1128 /* Update potential energies */
1129 gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
1130 gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
1132 /* Increment number of inner iterations */
1133 inneriter += j_index_end - j_index_start;
1135 /* Outer loop uses 26 flops */
1138 /* Increment number of outer iterations */
1141 /* Update outer/inner flops */
1143 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*416);
1146 * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_sparc64_hpc_ace_double
1147 * Electrostatics interaction: ReactionField
1148 * VdW interaction: LennardJones
1149 * Geometry: Water4-Water4
1150 * Calculate force/pot: Force
1153 nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_sparc64_hpc_ace_double
1154 (t_nblist * gmx_restrict nlist,
1155 rvec * gmx_restrict xx,
1156 rvec * gmx_restrict ff,
1157 t_forcerec * gmx_restrict fr,
1158 t_mdatoms * gmx_restrict mdatoms,
1159 nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
1160 t_nrnb * gmx_restrict nrnb)
1162 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1163 * just 0 for non-waters.
1164 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
1165 * jnr indices corresponding to data put in the four positions in the SIMD register.
1167 int i_shift_offset,i_coord_offset,outeriter,inneriter;
1168 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1170 int j_coord_offsetA,j_coord_offsetB;
1171 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
1172 real rcutoff_scalar;
1173 real *shiftvec,*fshift,*x,*f;
1174 _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1176 _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1178 _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1180 _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1182 _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
1183 int vdwjidx0A,vdwjidx0B;
1184 _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1185 int vdwjidx1A,vdwjidx1B;
1186 _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1187 int vdwjidx2A,vdwjidx2B;
1188 _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1189 int vdwjidx3A,vdwjidx3B;
1190 _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
1191 _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1192 _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1193 _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1194 _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
1195 _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1196 _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1197 _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
1198 _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
1199 _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
1200 _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
1201 _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
1204 _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
1207 _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
1208 _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
1209 _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
1210 real rswitch_scalar,d_scalar;
1211 _fjsp_v2r8 itab_tmp;
1212 _fjsp_v2r8 dummy_mask,cutoff_mask;
1213 _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
1214 _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
1215 union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1222 jindex = nlist->jindex;
1224 shiftidx = nlist->shift;
1226 shiftvec = fr->shift_vec[0];
1227 fshift = fr->fshift[0];
1228 facel = gmx_fjsp_set1_v2r8(fr->epsfac);
1229 charge = mdatoms->chargeA;
1230 krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
1231 krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
1232 crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
1233 nvdwtype = fr->ntype;
1234 vdwparam = fr->nbfp;
1235 vdwtype = mdatoms->typeA;
1237 /* Setup water-specific parameters */
1238 inr = nlist->iinr[0];
1239 iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1240 iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1241 iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
1242 vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
1244 jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
1245 jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
1246 jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
1247 vdwjidx0A = 2*vdwtype[inr+0];
1248 c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
1249 c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
1250 qq11 = _fjsp_mul_v2r8(iq1,jq1);
1251 qq12 = _fjsp_mul_v2r8(iq1,jq2);
1252 qq13 = _fjsp_mul_v2r8(iq1,jq3);
1253 qq21 = _fjsp_mul_v2r8(iq2,jq1);
1254 qq22 = _fjsp_mul_v2r8(iq2,jq2);
1255 qq23 = _fjsp_mul_v2r8(iq2,jq3);
1256 qq31 = _fjsp_mul_v2r8(iq3,jq1);
1257 qq32 = _fjsp_mul_v2r8(iq3,jq2);
1258 qq33 = _fjsp_mul_v2r8(iq3,jq3);
1260 /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
1261 rcutoff_scalar = fr->rcoulomb;
1262 rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
1263 rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
1265 rswitch_scalar = fr->rvdw_switch;
1266 rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
1267 /* Setup switch parameters */
1268 d_scalar = rcutoff_scalar-rswitch_scalar;
1269 d = gmx_fjsp_set1_v2r8(d_scalar);
1270 swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
1271 swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
1272 swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
1273 swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
1274 swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
1275 swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
1277 /* Avoid stupid compiler warnings */
1279 j_coord_offsetA = 0;
1280 j_coord_offsetB = 0;
1285 /* Start outer loop over neighborlists */
1286 for(iidx=0; iidx<nri; iidx++)
1288 /* Load shift vector for this list */
1289 i_shift_offset = DIM*shiftidx[iidx];
1291 /* Load limits for loop over neighbors */
1292 j_index_start = jindex[iidx];
1293 j_index_end = jindex[iidx+1];
1295 /* Get outer coordinate index */
1297 i_coord_offset = DIM*inr;
1299 /* Load i particle coords and add shift vector */
1300 gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
1301 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
1303 fix0 = _fjsp_setzero_v2r8();
1304 fiy0 = _fjsp_setzero_v2r8();
1305 fiz0 = _fjsp_setzero_v2r8();
1306 fix1 = _fjsp_setzero_v2r8();
1307 fiy1 = _fjsp_setzero_v2r8();
1308 fiz1 = _fjsp_setzero_v2r8();
1309 fix2 = _fjsp_setzero_v2r8();
1310 fiy2 = _fjsp_setzero_v2r8();
1311 fiz2 = _fjsp_setzero_v2r8();
1312 fix3 = _fjsp_setzero_v2r8();
1313 fiy3 = _fjsp_setzero_v2r8();
1314 fiz3 = _fjsp_setzero_v2r8();
1316 /* Start inner kernel loop */
1317 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1320 /* Get j neighbor index, and coordinate index */
1322 jnrB = jjnr[jidx+1];
1323 j_coord_offsetA = DIM*jnrA;
1324 j_coord_offsetB = DIM*jnrB;
1326 /* load j atom coordinates */
1327 gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
1328 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
1329 &jy2,&jz2,&jx3,&jy3,&jz3);
1331 /* Calculate displacement vector */
1332 dx00 = _fjsp_sub_v2r8(ix0,jx0);
1333 dy00 = _fjsp_sub_v2r8(iy0,jy0);
1334 dz00 = _fjsp_sub_v2r8(iz0,jz0);
1335 dx11 = _fjsp_sub_v2r8(ix1,jx1);
1336 dy11 = _fjsp_sub_v2r8(iy1,jy1);
1337 dz11 = _fjsp_sub_v2r8(iz1,jz1);
1338 dx12 = _fjsp_sub_v2r8(ix1,jx2);
1339 dy12 = _fjsp_sub_v2r8(iy1,jy2);
1340 dz12 = _fjsp_sub_v2r8(iz1,jz2);
1341 dx13 = _fjsp_sub_v2r8(ix1,jx3);
1342 dy13 = _fjsp_sub_v2r8(iy1,jy3);
1343 dz13 = _fjsp_sub_v2r8(iz1,jz3);
1344 dx21 = _fjsp_sub_v2r8(ix2,jx1);
1345 dy21 = _fjsp_sub_v2r8(iy2,jy1);
1346 dz21 = _fjsp_sub_v2r8(iz2,jz1);
1347 dx22 = _fjsp_sub_v2r8(ix2,jx2);
1348 dy22 = _fjsp_sub_v2r8(iy2,jy2);
1349 dz22 = _fjsp_sub_v2r8(iz2,jz2);
1350 dx23 = _fjsp_sub_v2r8(ix2,jx3);
1351 dy23 = _fjsp_sub_v2r8(iy2,jy3);
1352 dz23 = _fjsp_sub_v2r8(iz2,jz3);
1353 dx31 = _fjsp_sub_v2r8(ix3,jx1);
1354 dy31 = _fjsp_sub_v2r8(iy3,jy1);
1355 dz31 = _fjsp_sub_v2r8(iz3,jz1);
1356 dx32 = _fjsp_sub_v2r8(ix3,jx2);
1357 dy32 = _fjsp_sub_v2r8(iy3,jy2);
1358 dz32 = _fjsp_sub_v2r8(iz3,jz2);
1359 dx33 = _fjsp_sub_v2r8(ix3,jx3);
1360 dy33 = _fjsp_sub_v2r8(iy3,jy3);
1361 dz33 = _fjsp_sub_v2r8(iz3,jz3);
1363 /* Calculate squared distance and things based on it */
1364 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1365 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1366 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1367 rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
1368 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1369 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1370 rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
1371 rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
1372 rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
1373 rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
1375 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
1376 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
1377 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
1378 rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
1379 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
1380 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
1381 rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
1382 rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
1383 rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
1384 rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
1386 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
1387 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
1388 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
1389 rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
1390 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
1391 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
1392 rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
1393 rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
1394 rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
1395 rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
1397 fjx0 = _fjsp_setzero_v2r8();
1398 fjy0 = _fjsp_setzero_v2r8();
1399 fjz0 = _fjsp_setzero_v2r8();
1400 fjx1 = _fjsp_setzero_v2r8();
1401 fjy1 = _fjsp_setzero_v2r8();
1402 fjz1 = _fjsp_setzero_v2r8();
1403 fjx2 = _fjsp_setzero_v2r8();
1404 fjy2 = _fjsp_setzero_v2r8();
1405 fjz2 = _fjsp_setzero_v2r8();
1406 fjx3 = _fjsp_setzero_v2r8();
1407 fjy3 = _fjsp_setzero_v2r8();
1408 fjz3 = _fjsp_setzero_v2r8();
1410 /**************************
1411 * CALCULATE INTERACTIONS *
1412 **************************/
1414 if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
1417 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
1419 /* LENNARD-JONES DISPERSION/REPULSION */
1421 rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
1422 vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
1423 vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
1424 vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
1425 fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
1427 d = _fjsp_sub_v2r8(r00,rswitch);
1428 d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
1429 d2 = _fjsp_mul_v2r8(d,d);
1430 sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
1432 dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
1434 /* Evaluate switch function */
1435 /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
1436 fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
1437 cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
1441 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1443 /* Update vectorial force */
1444 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
1445 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1446 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1448 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1449 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1450 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1454 /**************************
1455 * CALCULATE INTERACTIONS *
1456 **************************/
1458 if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
1461 /* REACTION-FIELD ELECTROSTATICS */
1462 felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
1464 cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
1468 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1470 /* Update vectorial force */
1471 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
1472 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1473 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1475 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1476 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1477 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1481 /**************************
1482 * CALCULATE INTERACTIONS *
1483 **************************/
1485 if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
1488 /* REACTION-FIELD ELECTROSTATICS */
1489 felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
1491 cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
1495 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1497 /* Update vectorial force */
1498 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
1499 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1500 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1502 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1503 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1504 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1508 /**************************
1509 * CALCULATE INTERACTIONS *
1510 **************************/
1512 if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
1515 /* REACTION-FIELD ELECTROSTATICS */
1516 felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
1518 cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
1522 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1524 /* Update vectorial force */
1525 fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
1526 fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
1527 fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
1529 fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
1530 fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
1531 fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
1535 /**************************
1536 * CALCULATE INTERACTIONS *
1537 **************************/
1539 if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1542 /* REACTION-FIELD ELECTROSTATICS */
1543 felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1545 cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1549 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1551 /* Update vectorial force */
1552 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1553 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1554 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1556 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1557 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1558 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1562 /**************************
1563 * CALCULATE INTERACTIONS *
1564 **************************/
1566 if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1569 /* REACTION-FIELD ELECTROSTATICS */
1570 felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1572 cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1576 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1578 /* Update vectorial force */
1579 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1580 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1581 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1583 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1584 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1585 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1589 /**************************
1590 * CALCULATE INTERACTIONS *
1591 **************************/
1593 if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
1596 /* REACTION-FIELD ELECTROSTATICS */
1597 felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
1599 cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
1603 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1605 /* Update vectorial force */
1606 fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
1607 fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
1608 fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
1610 fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
1611 fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
1612 fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
1616 /**************************
1617 * CALCULATE INTERACTIONS *
1618 **************************/
1620 if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
1623 /* REACTION-FIELD ELECTROSTATICS */
1624 felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
1626 cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
1630 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1632 /* Update vectorial force */
1633 fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
1634 fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
1635 fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
1637 fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
1638 fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
1639 fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
1643 /**************************
1644 * CALCULATE INTERACTIONS *
1645 **************************/
1647 if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
1650 /* REACTION-FIELD ELECTROSTATICS */
1651 felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
1653 cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
1657 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1659 /* Update vectorial force */
1660 fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
1661 fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
1662 fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
1664 fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
1665 fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
1666 fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
1670 /**************************
1671 * CALCULATE INTERACTIONS *
1672 **************************/
1674 if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
1677 /* REACTION-FIELD ELECTROSTATICS */
1678 felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
1680 cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
1684 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1686 /* Update vectorial force */
1687 fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
1688 fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
1689 fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
1691 fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
1692 fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
1693 fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
1697 gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1699 /* Inner loop uses 359 flops */
1702 if(jidx<j_index_end)
1706 j_coord_offsetA = DIM*jnrA;
1708 /* load j atom coordinates */
1709 gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1710 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
1711 &jy2,&jz2,&jx3,&jy3,&jz3);
1713 /* Calculate displacement vector */
1714 dx00 = _fjsp_sub_v2r8(ix0,jx0);
1715 dy00 = _fjsp_sub_v2r8(iy0,jy0);
1716 dz00 = _fjsp_sub_v2r8(iz0,jz0);
1717 dx11 = _fjsp_sub_v2r8(ix1,jx1);
1718 dy11 = _fjsp_sub_v2r8(iy1,jy1);
1719 dz11 = _fjsp_sub_v2r8(iz1,jz1);
1720 dx12 = _fjsp_sub_v2r8(ix1,jx2);
1721 dy12 = _fjsp_sub_v2r8(iy1,jy2);
1722 dz12 = _fjsp_sub_v2r8(iz1,jz2);
1723 dx13 = _fjsp_sub_v2r8(ix1,jx3);
1724 dy13 = _fjsp_sub_v2r8(iy1,jy3);
1725 dz13 = _fjsp_sub_v2r8(iz1,jz3);
1726 dx21 = _fjsp_sub_v2r8(ix2,jx1);
1727 dy21 = _fjsp_sub_v2r8(iy2,jy1);
1728 dz21 = _fjsp_sub_v2r8(iz2,jz1);
1729 dx22 = _fjsp_sub_v2r8(ix2,jx2);
1730 dy22 = _fjsp_sub_v2r8(iy2,jy2);
1731 dz22 = _fjsp_sub_v2r8(iz2,jz2);
1732 dx23 = _fjsp_sub_v2r8(ix2,jx3);
1733 dy23 = _fjsp_sub_v2r8(iy2,jy3);
1734 dz23 = _fjsp_sub_v2r8(iz2,jz3);
1735 dx31 = _fjsp_sub_v2r8(ix3,jx1);
1736 dy31 = _fjsp_sub_v2r8(iy3,jy1);
1737 dz31 = _fjsp_sub_v2r8(iz3,jz1);
1738 dx32 = _fjsp_sub_v2r8(ix3,jx2);
1739 dy32 = _fjsp_sub_v2r8(iy3,jy2);
1740 dz32 = _fjsp_sub_v2r8(iz3,jz2);
1741 dx33 = _fjsp_sub_v2r8(ix3,jx3);
1742 dy33 = _fjsp_sub_v2r8(iy3,jy3);
1743 dz33 = _fjsp_sub_v2r8(iz3,jz3);
1745 /* Calculate squared distance and things based on it */
1746 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1747 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1748 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1749 rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
1750 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1751 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1752 rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
1753 rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
1754 rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
1755 rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
1757 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
1758 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
1759 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
1760 rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
1761 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
1762 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
1763 rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
1764 rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
1765 rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
1766 rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
1768 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
1769 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
1770 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
1771 rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
1772 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
1773 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
1774 rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
1775 rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
1776 rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
1777 rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
1779 fjx0 = _fjsp_setzero_v2r8();
1780 fjy0 = _fjsp_setzero_v2r8();
1781 fjz0 = _fjsp_setzero_v2r8();
1782 fjx1 = _fjsp_setzero_v2r8();
1783 fjy1 = _fjsp_setzero_v2r8();
1784 fjz1 = _fjsp_setzero_v2r8();
1785 fjx2 = _fjsp_setzero_v2r8();
1786 fjy2 = _fjsp_setzero_v2r8();
1787 fjz2 = _fjsp_setzero_v2r8();
1788 fjx3 = _fjsp_setzero_v2r8();
1789 fjy3 = _fjsp_setzero_v2r8();
1790 fjz3 = _fjsp_setzero_v2r8();
1792 /**************************
1793 * CALCULATE INTERACTIONS *
1794 **************************/
1796 if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
1799 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
1801 /* LENNARD-JONES DISPERSION/REPULSION */
1803 rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
1804 vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
1805 vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
1806 vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
1807 fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
1809 d = _fjsp_sub_v2r8(r00,rswitch);
1810 d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
1811 d2 = _fjsp_mul_v2r8(d,d);
1812 sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
1814 dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
1816 /* Evaluate switch function */
1817 /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
1818 fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
1819 cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
1823 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1825 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1827 /* Update vectorial force */
1828 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
1829 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1830 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1832 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1833 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1834 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1838 /**************************
1839 * CALCULATE INTERACTIONS *
1840 **************************/
1842 if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
1845 /* REACTION-FIELD ELECTROSTATICS */
1846 felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
1848 cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
1852 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1854 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1856 /* Update vectorial force */
1857 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
1858 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1859 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1861 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1862 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1863 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1867 /**************************
1868 * CALCULATE INTERACTIONS *
1869 **************************/
1871 if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
1874 /* REACTION-FIELD ELECTROSTATICS */
1875 felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
1877 cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
1881 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1883 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1885 /* Update vectorial force */
1886 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
1887 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1888 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1890 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1891 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1892 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1896 /**************************
1897 * CALCULATE INTERACTIONS *
1898 **************************/
1900 if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
1903 /* REACTION-FIELD ELECTROSTATICS */
1904 felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
1906 cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
1910 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1912 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1914 /* Update vectorial force */
1915 fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
1916 fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
1917 fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
1919 fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
1920 fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
1921 fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
1925 /**************************
1926 * CALCULATE INTERACTIONS *
1927 **************************/
1929 if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1932 /* REACTION-FIELD ELECTROSTATICS */
1933 felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1935 cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1939 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1941 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1943 /* Update vectorial force */
1944 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1945 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1946 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1948 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1949 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1950 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1954 /**************************
1955 * CALCULATE INTERACTIONS *
1956 **************************/
1958 if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1961 /* REACTION-FIELD ELECTROSTATICS */
1962 felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1964 cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1968 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1970 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1972 /* Update vectorial force */
1973 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1974 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1975 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1977 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1978 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1979 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1983 /**************************
1984 * CALCULATE INTERACTIONS *
1985 **************************/
1987 if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
1990 /* REACTION-FIELD ELECTROSTATICS */
1991 felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
1993 cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
1997 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1999 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2001 /* Update vectorial force */
2002 fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
2003 fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
2004 fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
2006 fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
2007 fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
2008 fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
2012 /**************************
2013 * CALCULATE INTERACTIONS *
2014 **************************/
2016 if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
2019 /* REACTION-FIELD ELECTROSTATICS */
2020 felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
2022 cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
2026 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
2028 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2030 /* Update vectorial force */
2031 fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
2032 fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
2033 fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
2035 fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
2036 fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
2037 fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
2041 /**************************
2042 * CALCULATE INTERACTIONS *
2043 **************************/
2045 if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
2048 /* REACTION-FIELD ELECTROSTATICS */
2049 felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
2051 cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
2055 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
2057 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2059 /* Update vectorial force */
2060 fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
2061 fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
2062 fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
2064 fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
2065 fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
2066 fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
2070 /**************************
2071 * CALCULATE INTERACTIONS *
2072 **************************/
2074 if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
2077 /* REACTION-FIELD ELECTROSTATICS */
2078 felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
2080 cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
2084 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
2086 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2088 /* Update vectorial force */
2089 fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
2090 fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
2091 fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
2093 fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
2094 fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
2095 fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
2099 gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
2101 /* Inner loop uses 359 flops */
2104 /* End of innermost loop */
2106 gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
2107 f+i_coord_offset,fshift+i_shift_offset);
2109 /* Increment number of inner iterations */
2110 inneriter += j_index_end - j_index_start;
2112 /* Outer loop uses 24 flops */
2115 /* Increment number of outer iterations */
2118 /* Update outer/inner flops */
2120 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*359);