2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
42 #include "../nb_kernel.h"
43 #include "types/simple.h"
44 #include "gromacs/math/vec.h"
47 #include "kernelutil_sparc64_hpc_ace_double.h"
50 * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
51 * Electrostatics interaction: ReactionField
52 * VdW interaction: CubicSplineTable
53 * Geometry: Water3-Water3
54 * Calculate force/pot: PotentialAndForce
57 nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
58 (t_nblist * gmx_restrict nlist,
59 rvec * gmx_restrict xx,
60 rvec * gmx_restrict ff,
61 t_forcerec * gmx_restrict fr,
62 t_mdatoms * gmx_restrict mdatoms,
63 nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
64 t_nrnb * gmx_restrict nrnb)
66 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
67 * just 0 for non-waters.
68 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
69 * jnr indices corresponding to data put in the four positions in the SIMD register.
71 int i_shift_offset,i_coord_offset,outeriter,inneriter;
72 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
74 int j_coord_offsetA,j_coord_offsetB;
75 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
77 real *shiftvec,*fshift,*x,*f;
78 _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
80 _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
82 _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
84 _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
85 int vdwjidx0A,vdwjidx0B;
86 _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
87 int vdwjidx1A,vdwjidx1B;
88 _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
89 int vdwjidx2A,vdwjidx2B;
90 _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
91 _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
92 _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
93 _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
94 _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
95 _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
96 _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
97 _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
98 _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
99 _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
100 _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
103 _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
106 _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
107 _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
108 _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
111 _fjsp_v2r8 dummy_mask,cutoff_mask;
112 _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
113 _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
114 union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
121 jindex = nlist->jindex;
123 shiftidx = nlist->shift;
125 shiftvec = fr->shift_vec[0];
126 fshift = fr->fshift[0];
127 facel = gmx_fjsp_set1_v2r8(fr->epsfac);
128 charge = mdatoms->chargeA;
129 krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
130 krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
131 crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
132 nvdwtype = fr->ntype;
134 vdwtype = mdatoms->typeA;
136 vftab = kernel_data->table_vdw->data;
137 vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
139 /* Setup water-specific parameters */
140 inr = nlist->iinr[0];
141 iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
142 iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
143 iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
144 vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
146 jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
147 jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
148 jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
149 vdwjidx0A = 2*vdwtype[inr+0];
150 qq00 = _fjsp_mul_v2r8(iq0,jq0);
151 c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
152 c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
153 qq01 = _fjsp_mul_v2r8(iq0,jq1);
154 qq02 = _fjsp_mul_v2r8(iq0,jq2);
155 qq10 = _fjsp_mul_v2r8(iq1,jq0);
156 qq11 = _fjsp_mul_v2r8(iq1,jq1);
157 qq12 = _fjsp_mul_v2r8(iq1,jq2);
158 qq20 = _fjsp_mul_v2r8(iq2,jq0);
159 qq21 = _fjsp_mul_v2r8(iq2,jq1);
160 qq22 = _fjsp_mul_v2r8(iq2,jq2);
162 /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
163 rcutoff_scalar = fr->rcoulomb;
164 rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
165 rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
167 /* Avoid stupid compiler warnings */
175 /* Start outer loop over neighborlists */
176 for(iidx=0; iidx<nri; iidx++)
178 /* Load shift vector for this list */
179 i_shift_offset = DIM*shiftidx[iidx];
181 /* Load limits for loop over neighbors */
182 j_index_start = jindex[iidx];
183 j_index_end = jindex[iidx+1];
185 /* Get outer coordinate index */
187 i_coord_offset = DIM*inr;
189 /* Load i particle coords and add shift vector */
190 gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
191 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
193 fix0 = _fjsp_setzero_v2r8();
194 fiy0 = _fjsp_setzero_v2r8();
195 fiz0 = _fjsp_setzero_v2r8();
196 fix1 = _fjsp_setzero_v2r8();
197 fiy1 = _fjsp_setzero_v2r8();
198 fiz1 = _fjsp_setzero_v2r8();
199 fix2 = _fjsp_setzero_v2r8();
200 fiy2 = _fjsp_setzero_v2r8();
201 fiz2 = _fjsp_setzero_v2r8();
203 /* Reset potential sums */
204 velecsum = _fjsp_setzero_v2r8();
205 vvdwsum = _fjsp_setzero_v2r8();
207 /* Start inner kernel loop */
208 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
211 /* Get j neighbor index, and coordinate index */
214 j_coord_offsetA = DIM*jnrA;
215 j_coord_offsetB = DIM*jnrB;
217 /* load j atom coordinates */
218 gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
219 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
221 /* Calculate displacement vector */
222 dx00 = _fjsp_sub_v2r8(ix0,jx0);
223 dy00 = _fjsp_sub_v2r8(iy0,jy0);
224 dz00 = _fjsp_sub_v2r8(iz0,jz0);
225 dx01 = _fjsp_sub_v2r8(ix0,jx1);
226 dy01 = _fjsp_sub_v2r8(iy0,jy1);
227 dz01 = _fjsp_sub_v2r8(iz0,jz1);
228 dx02 = _fjsp_sub_v2r8(ix0,jx2);
229 dy02 = _fjsp_sub_v2r8(iy0,jy2);
230 dz02 = _fjsp_sub_v2r8(iz0,jz2);
231 dx10 = _fjsp_sub_v2r8(ix1,jx0);
232 dy10 = _fjsp_sub_v2r8(iy1,jy0);
233 dz10 = _fjsp_sub_v2r8(iz1,jz0);
234 dx11 = _fjsp_sub_v2r8(ix1,jx1);
235 dy11 = _fjsp_sub_v2r8(iy1,jy1);
236 dz11 = _fjsp_sub_v2r8(iz1,jz1);
237 dx12 = _fjsp_sub_v2r8(ix1,jx2);
238 dy12 = _fjsp_sub_v2r8(iy1,jy2);
239 dz12 = _fjsp_sub_v2r8(iz1,jz2);
240 dx20 = _fjsp_sub_v2r8(ix2,jx0);
241 dy20 = _fjsp_sub_v2r8(iy2,jy0);
242 dz20 = _fjsp_sub_v2r8(iz2,jz0);
243 dx21 = _fjsp_sub_v2r8(ix2,jx1);
244 dy21 = _fjsp_sub_v2r8(iy2,jy1);
245 dz21 = _fjsp_sub_v2r8(iz2,jz1);
246 dx22 = _fjsp_sub_v2r8(ix2,jx2);
247 dy22 = _fjsp_sub_v2r8(iy2,jy2);
248 dz22 = _fjsp_sub_v2r8(iz2,jz2);
250 /* Calculate squared distance and things based on it */
251 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
252 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
253 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
254 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
255 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
256 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
257 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
258 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
259 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
261 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
262 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
263 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
264 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
265 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
266 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
267 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
268 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
269 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
271 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
272 rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
273 rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
274 rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
275 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
276 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
277 rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
278 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
279 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
281 fjx0 = _fjsp_setzero_v2r8();
282 fjy0 = _fjsp_setzero_v2r8();
283 fjz0 = _fjsp_setzero_v2r8();
284 fjx1 = _fjsp_setzero_v2r8();
285 fjy1 = _fjsp_setzero_v2r8();
286 fjz1 = _fjsp_setzero_v2r8();
287 fjx2 = _fjsp_setzero_v2r8();
288 fjy2 = _fjsp_setzero_v2r8();
289 fjz2 = _fjsp_setzero_v2r8();
291 /**************************
292 * CALCULATE INTERACTIONS *
293 **************************/
295 if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
298 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
300 /* Calculate table index by multiplying r with table scale and truncate to integer */
301 rt = _fjsp_mul_v2r8(r00,vftabscale);
302 itab_tmp = _fjsp_dtox_v2r8(rt);
303 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
304 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
305 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
310 /* REACTION-FIELD ELECTROSTATICS */
311 velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
312 felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
314 /* CUBIC SPLINE TABLE DISPERSION */
315 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
316 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
317 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
318 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
319 H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
320 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
321 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
322 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
323 vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
324 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
325 fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
327 /* CUBIC SPLINE TABLE REPULSION */
328 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
329 F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
330 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
331 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
332 H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
333 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
334 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
335 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
336 vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
337 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
338 fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
339 vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
340 fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
342 cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
344 /* Update potential sum for this i atom from the interaction with this j atom. */
345 velec = _fjsp_and_v2r8(velec,cutoff_mask);
346 velecsum = _fjsp_add_v2r8(velecsum,velec);
347 vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
348 vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
350 fscal = _fjsp_add_v2r8(felec,fvdw);
352 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
354 /* Update vectorial force */
355 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
356 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
357 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
359 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
360 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
361 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
365 /**************************
366 * CALCULATE INTERACTIONS *
367 **************************/
369 if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
372 /* REACTION-FIELD ELECTROSTATICS */
373 velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
374 felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
376 cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
378 /* Update potential sum for this i atom from the interaction with this j atom. */
379 velec = _fjsp_and_v2r8(velec,cutoff_mask);
380 velecsum = _fjsp_add_v2r8(velecsum,velec);
384 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
386 /* Update vectorial force */
387 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
388 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
389 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
391 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
392 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
393 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
397 /**************************
398 * CALCULATE INTERACTIONS *
399 **************************/
401 if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
404 /* REACTION-FIELD ELECTROSTATICS */
405 velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
406 felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
408 cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
410 /* Update potential sum for this i atom from the interaction with this j atom. */
411 velec = _fjsp_and_v2r8(velec,cutoff_mask);
412 velecsum = _fjsp_add_v2r8(velecsum,velec);
416 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
418 /* Update vectorial force */
419 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
420 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
421 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
423 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
424 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
425 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
429 /**************************
430 * CALCULATE INTERACTIONS *
431 **************************/
433 if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
436 /* REACTION-FIELD ELECTROSTATICS */
437 velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
438 felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
440 cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
442 /* Update potential sum for this i atom from the interaction with this j atom. */
443 velec = _fjsp_and_v2r8(velec,cutoff_mask);
444 velecsum = _fjsp_add_v2r8(velecsum,velec);
448 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
450 /* Update vectorial force */
451 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
452 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
453 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
455 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
456 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
457 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
461 /**************************
462 * CALCULATE INTERACTIONS *
463 **************************/
465 if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
468 /* REACTION-FIELD ELECTROSTATICS */
469 velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
470 felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
472 cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
474 /* Update potential sum for this i atom from the interaction with this j atom. */
475 velec = _fjsp_and_v2r8(velec,cutoff_mask);
476 velecsum = _fjsp_add_v2r8(velecsum,velec);
480 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
482 /* Update vectorial force */
483 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
484 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
485 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
487 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
488 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
489 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
493 /**************************
494 * CALCULATE INTERACTIONS *
495 **************************/
497 if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
500 /* REACTION-FIELD ELECTROSTATICS */
501 velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
502 felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
504 cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
506 /* Update potential sum for this i atom from the interaction with this j atom. */
507 velec = _fjsp_and_v2r8(velec,cutoff_mask);
508 velecsum = _fjsp_add_v2r8(velecsum,velec);
512 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
514 /* Update vectorial force */
515 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
516 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
517 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
519 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
520 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
521 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
525 /**************************
526 * CALCULATE INTERACTIONS *
527 **************************/
529 if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
532 /* REACTION-FIELD ELECTROSTATICS */
533 velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
534 felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
536 cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
538 /* Update potential sum for this i atom from the interaction with this j atom. */
539 velec = _fjsp_and_v2r8(velec,cutoff_mask);
540 velecsum = _fjsp_add_v2r8(velecsum,velec);
544 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
546 /* Update vectorial force */
547 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
548 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
549 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
551 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
552 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
553 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
557 /**************************
558 * CALCULATE INTERACTIONS *
559 **************************/
561 if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
564 /* REACTION-FIELD ELECTROSTATICS */
565 velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
566 felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
568 cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
570 /* Update potential sum for this i atom from the interaction with this j atom. */
571 velec = _fjsp_and_v2r8(velec,cutoff_mask);
572 velecsum = _fjsp_add_v2r8(velecsum,velec);
576 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
578 /* Update vectorial force */
579 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
580 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
581 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
583 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
584 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
585 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
589 /**************************
590 * CALCULATE INTERACTIONS *
591 **************************/
593 if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
596 /* REACTION-FIELD ELECTROSTATICS */
597 velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
598 felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
600 cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
602 /* Update potential sum for this i atom from the interaction with this j atom. */
603 velec = _fjsp_and_v2r8(velec,cutoff_mask);
604 velecsum = _fjsp_add_v2r8(velecsum,velec);
608 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
610 /* Update vectorial force */
611 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
612 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
613 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
615 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
616 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
617 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
621 gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
623 /* Inner loop uses 387 flops */
630 j_coord_offsetA = DIM*jnrA;
632 /* load j atom coordinates */
633 gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
634 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
636 /* Calculate displacement vector */
637 dx00 = _fjsp_sub_v2r8(ix0,jx0);
638 dy00 = _fjsp_sub_v2r8(iy0,jy0);
639 dz00 = _fjsp_sub_v2r8(iz0,jz0);
640 dx01 = _fjsp_sub_v2r8(ix0,jx1);
641 dy01 = _fjsp_sub_v2r8(iy0,jy1);
642 dz01 = _fjsp_sub_v2r8(iz0,jz1);
643 dx02 = _fjsp_sub_v2r8(ix0,jx2);
644 dy02 = _fjsp_sub_v2r8(iy0,jy2);
645 dz02 = _fjsp_sub_v2r8(iz0,jz2);
646 dx10 = _fjsp_sub_v2r8(ix1,jx0);
647 dy10 = _fjsp_sub_v2r8(iy1,jy0);
648 dz10 = _fjsp_sub_v2r8(iz1,jz0);
649 dx11 = _fjsp_sub_v2r8(ix1,jx1);
650 dy11 = _fjsp_sub_v2r8(iy1,jy1);
651 dz11 = _fjsp_sub_v2r8(iz1,jz1);
652 dx12 = _fjsp_sub_v2r8(ix1,jx2);
653 dy12 = _fjsp_sub_v2r8(iy1,jy2);
654 dz12 = _fjsp_sub_v2r8(iz1,jz2);
655 dx20 = _fjsp_sub_v2r8(ix2,jx0);
656 dy20 = _fjsp_sub_v2r8(iy2,jy0);
657 dz20 = _fjsp_sub_v2r8(iz2,jz0);
658 dx21 = _fjsp_sub_v2r8(ix2,jx1);
659 dy21 = _fjsp_sub_v2r8(iy2,jy1);
660 dz21 = _fjsp_sub_v2r8(iz2,jz1);
661 dx22 = _fjsp_sub_v2r8(ix2,jx2);
662 dy22 = _fjsp_sub_v2r8(iy2,jy2);
663 dz22 = _fjsp_sub_v2r8(iz2,jz2);
665 /* Calculate squared distance and things based on it */
666 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
667 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
668 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
669 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
670 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
671 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
672 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
673 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
674 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
676 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
677 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
678 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
679 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
680 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
681 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
682 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
683 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
684 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
686 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
687 rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
688 rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
689 rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
690 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
691 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
692 rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
693 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
694 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
696 fjx0 = _fjsp_setzero_v2r8();
697 fjy0 = _fjsp_setzero_v2r8();
698 fjz0 = _fjsp_setzero_v2r8();
699 fjx1 = _fjsp_setzero_v2r8();
700 fjy1 = _fjsp_setzero_v2r8();
701 fjz1 = _fjsp_setzero_v2r8();
702 fjx2 = _fjsp_setzero_v2r8();
703 fjy2 = _fjsp_setzero_v2r8();
704 fjz2 = _fjsp_setzero_v2r8();
706 /**************************
707 * CALCULATE INTERACTIONS *
708 **************************/
710 if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
713 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
715 /* Calculate table index by multiplying r with table scale and truncate to integer */
716 rt = _fjsp_mul_v2r8(r00,vftabscale);
717 itab_tmp = _fjsp_dtox_v2r8(rt);
718 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
719 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
720 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
725 /* REACTION-FIELD ELECTROSTATICS */
726 velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
727 felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
729 /* CUBIC SPLINE TABLE DISPERSION */
730 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
731 F = _fjsp_setzero_v2r8();
732 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
733 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
734 H = _fjsp_setzero_v2r8();
735 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
736 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
737 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
738 vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
739 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
740 fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
742 /* CUBIC SPLINE TABLE REPULSION */
743 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
744 F = _fjsp_setzero_v2r8();
745 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
746 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
747 H = _fjsp_setzero_v2r8();
748 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
749 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
750 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
751 vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
752 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
753 fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
754 vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
755 fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
757 cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
759 /* Update potential sum for this i atom from the interaction with this j atom. */
760 velec = _fjsp_and_v2r8(velec,cutoff_mask);
761 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
762 velecsum = _fjsp_add_v2r8(velecsum,velec);
763 vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
764 vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
765 vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
767 fscal = _fjsp_add_v2r8(felec,fvdw);
769 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
771 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
773 /* Update vectorial force */
774 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
775 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
776 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
778 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
779 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
780 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
784 /**************************
785 * CALCULATE INTERACTIONS *
786 **************************/
788 if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
791 /* REACTION-FIELD ELECTROSTATICS */
792 velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
793 felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
795 cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
797 /* Update potential sum for this i atom from the interaction with this j atom. */
798 velec = _fjsp_and_v2r8(velec,cutoff_mask);
799 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
800 velecsum = _fjsp_add_v2r8(velecsum,velec);
804 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
806 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
808 /* Update vectorial force */
809 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
810 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
811 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
813 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
814 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
815 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
819 /**************************
820 * CALCULATE INTERACTIONS *
821 **************************/
823 if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
826 /* REACTION-FIELD ELECTROSTATICS */
827 velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
828 felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
830 cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
832 /* Update potential sum for this i atom from the interaction with this j atom. */
833 velec = _fjsp_and_v2r8(velec,cutoff_mask);
834 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
835 velecsum = _fjsp_add_v2r8(velecsum,velec);
839 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
841 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
843 /* Update vectorial force */
844 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
845 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
846 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
848 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
849 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
850 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
854 /**************************
855 * CALCULATE INTERACTIONS *
856 **************************/
858 if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
861 /* REACTION-FIELD ELECTROSTATICS */
862 velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
863 felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
865 cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
867 /* Update potential sum for this i atom from the interaction with this j atom. */
868 velec = _fjsp_and_v2r8(velec,cutoff_mask);
869 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
870 velecsum = _fjsp_add_v2r8(velecsum,velec);
874 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
876 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
878 /* Update vectorial force */
879 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
880 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
881 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
883 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
884 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
885 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
889 /**************************
890 * CALCULATE INTERACTIONS *
891 **************************/
893 if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
896 /* REACTION-FIELD ELECTROSTATICS */
897 velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
898 felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
900 cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
902 /* Update potential sum for this i atom from the interaction with this j atom. */
903 velec = _fjsp_and_v2r8(velec,cutoff_mask);
904 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
905 velecsum = _fjsp_add_v2r8(velecsum,velec);
909 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
911 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
913 /* Update vectorial force */
914 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
915 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
916 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
918 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
919 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
920 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
924 /**************************
925 * CALCULATE INTERACTIONS *
926 **************************/
928 if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
931 /* REACTION-FIELD ELECTROSTATICS */
932 velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
933 felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
935 cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
937 /* Update potential sum for this i atom from the interaction with this j atom. */
938 velec = _fjsp_and_v2r8(velec,cutoff_mask);
939 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
940 velecsum = _fjsp_add_v2r8(velecsum,velec);
944 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
946 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
948 /* Update vectorial force */
949 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
950 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
951 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
953 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
954 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
955 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
959 /**************************
960 * CALCULATE INTERACTIONS *
961 **************************/
963 if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
966 /* REACTION-FIELD ELECTROSTATICS */
967 velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
968 felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
970 cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
972 /* Update potential sum for this i atom from the interaction with this j atom. */
973 velec = _fjsp_and_v2r8(velec,cutoff_mask);
974 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
975 velecsum = _fjsp_add_v2r8(velecsum,velec);
979 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
981 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
983 /* Update vectorial force */
984 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
985 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
986 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
988 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
989 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
990 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
994 /**************************
995 * CALCULATE INTERACTIONS *
996 **************************/
998 if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1001 /* REACTION-FIELD ELECTROSTATICS */
1002 velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
1003 felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1005 cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1007 /* Update potential sum for this i atom from the interaction with this j atom. */
1008 velec = _fjsp_and_v2r8(velec,cutoff_mask);
1009 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1010 velecsum = _fjsp_add_v2r8(velecsum,velec);
1014 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1016 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1018 /* Update vectorial force */
1019 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1020 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1021 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1023 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1024 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1025 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1029 /**************************
1030 * CALCULATE INTERACTIONS *
1031 **************************/
1033 if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1036 /* REACTION-FIELD ELECTROSTATICS */
1037 velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
1038 felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1040 cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1042 /* Update potential sum for this i atom from the interaction with this j atom. */
1043 velec = _fjsp_and_v2r8(velec,cutoff_mask);
1044 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1045 velecsum = _fjsp_add_v2r8(velecsum,velec);
1049 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1051 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1053 /* Update vectorial force */
1054 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1055 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1056 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1058 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1059 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1060 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1064 gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1066 /* Inner loop uses 387 flops */
1069 /* End of innermost loop */
1071 gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1072 f+i_coord_offset,fshift+i_shift_offset);
1075 /* Update potential energies */
1076 gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
1077 gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
1079 /* Increment number of inner iterations */
1080 inneriter += j_index_end - j_index_start;
1082 /* Outer loop uses 20 flops */
1085 /* Increment number of outer iterations */
1088 /* Update outer/inner flops */
1090 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*387);
1093 * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
1094 * Electrostatics interaction: ReactionField
1095 * VdW interaction: CubicSplineTable
1096 * Geometry: Water3-Water3
1097 * Calculate force/pot: Force
1100 nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
1101 (t_nblist * gmx_restrict nlist,
1102 rvec * gmx_restrict xx,
1103 rvec * gmx_restrict ff,
1104 t_forcerec * gmx_restrict fr,
1105 t_mdatoms * gmx_restrict mdatoms,
1106 nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
1107 t_nrnb * gmx_restrict nrnb)
1109 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1110 * just 0 for non-waters.
1111 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
1112 * jnr indices corresponding to data put in the four positions in the SIMD register.
1114 int i_shift_offset,i_coord_offset,outeriter,inneriter;
1115 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1117 int j_coord_offsetA,j_coord_offsetB;
1118 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
1119 real rcutoff_scalar;
1120 real *shiftvec,*fshift,*x,*f;
1121 _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1123 _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1125 _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1127 _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1128 int vdwjidx0A,vdwjidx0B;
1129 _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1130 int vdwjidx1A,vdwjidx1B;
1131 _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1132 int vdwjidx2A,vdwjidx2B;
1133 _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1134 _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1135 _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
1136 _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
1137 _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
1138 _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1139 _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1140 _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
1141 _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1142 _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1143 _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
1146 _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
1149 _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
1150 _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
1151 _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
1153 _fjsp_v2r8 itab_tmp;
1154 _fjsp_v2r8 dummy_mask,cutoff_mask;
1155 _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
1156 _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
1157 union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1164 jindex = nlist->jindex;
1166 shiftidx = nlist->shift;
1168 shiftvec = fr->shift_vec[0];
1169 fshift = fr->fshift[0];
1170 facel = gmx_fjsp_set1_v2r8(fr->epsfac);
1171 charge = mdatoms->chargeA;
1172 krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
1173 krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
1174 crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
1175 nvdwtype = fr->ntype;
1176 vdwparam = fr->nbfp;
1177 vdwtype = mdatoms->typeA;
1179 vftab = kernel_data->table_vdw->data;
1180 vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
1182 /* Setup water-specific parameters */
1183 inr = nlist->iinr[0];
1184 iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
1185 iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1186 iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1187 vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
1189 jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
1190 jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
1191 jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
1192 vdwjidx0A = 2*vdwtype[inr+0];
1193 qq00 = _fjsp_mul_v2r8(iq0,jq0);
1194 c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
1195 c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
1196 qq01 = _fjsp_mul_v2r8(iq0,jq1);
1197 qq02 = _fjsp_mul_v2r8(iq0,jq2);
1198 qq10 = _fjsp_mul_v2r8(iq1,jq0);
1199 qq11 = _fjsp_mul_v2r8(iq1,jq1);
1200 qq12 = _fjsp_mul_v2r8(iq1,jq2);
1201 qq20 = _fjsp_mul_v2r8(iq2,jq0);
1202 qq21 = _fjsp_mul_v2r8(iq2,jq1);
1203 qq22 = _fjsp_mul_v2r8(iq2,jq2);
1205 /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
1206 rcutoff_scalar = fr->rcoulomb;
1207 rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
1208 rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
1210 /* Avoid stupid compiler warnings */
1212 j_coord_offsetA = 0;
1213 j_coord_offsetB = 0;
1218 /* Start outer loop over neighborlists */
1219 for(iidx=0; iidx<nri; iidx++)
1221 /* Load shift vector for this list */
1222 i_shift_offset = DIM*shiftidx[iidx];
1224 /* Load limits for loop over neighbors */
1225 j_index_start = jindex[iidx];
1226 j_index_end = jindex[iidx+1];
1228 /* Get outer coordinate index */
1230 i_coord_offset = DIM*inr;
1232 /* Load i particle coords and add shift vector */
1233 gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
1234 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
1236 fix0 = _fjsp_setzero_v2r8();
1237 fiy0 = _fjsp_setzero_v2r8();
1238 fiz0 = _fjsp_setzero_v2r8();
1239 fix1 = _fjsp_setzero_v2r8();
1240 fiy1 = _fjsp_setzero_v2r8();
1241 fiz1 = _fjsp_setzero_v2r8();
1242 fix2 = _fjsp_setzero_v2r8();
1243 fiy2 = _fjsp_setzero_v2r8();
1244 fiz2 = _fjsp_setzero_v2r8();
1246 /* Start inner kernel loop */
1247 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1250 /* Get j neighbor index, and coordinate index */
1252 jnrB = jjnr[jidx+1];
1253 j_coord_offsetA = DIM*jnrA;
1254 j_coord_offsetB = DIM*jnrB;
1256 /* load j atom coordinates */
1257 gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
1258 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1260 /* Calculate displacement vector */
1261 dx00 = _fjsp_sub_v2r8(ix0,jx0);
1262 dy00 = _fjsp_sub_v2r8(iy0,jy0);
1263 dz00 = _fjsp_sub_v2r8(iz0,jz0);
1264 dx01 = _fjsp_sub_v2r8(ix0,jx1);
1265 dy01 = _fjsp_sub_v2r8(iy0,jy1);
1266 dz01 = _fjsp_sub_v2r8(iz0,jz1);
1267 dx02 = _fjsp_sub_v2r8(ix0,jx2);
1268 dy02 = _fjsp_sub_v2r8(iy0,jy2);
1269 dz02 = _fjsp_sub_v2r8(iz0,jz2);
1270 dx10 = _fjsp_sub_v2r8(ix1,jx0);
1271 dy10 = _fjsp_sub_v2r8(iy1,jy0);
1272 dz10 = _fjsp_sub_v2r8(iz1,jz0);
1273 dx11 = _fjsp_sub_v2r8(ix1,jx1);
1274 dy11 = _fjsp_sub_v2r8(iy1,jy1);
1275 dz11 = _fjsp_sub_v2r8(iz1,jz1);
1276 dx12 = _fjsp_sub_v2r8(ix1,jx2);
1277 dy12 = _fjsp_sub_v2r8(iy1,jy2);
1278 dz12 = _fjsp_sub_v2r8(iz1,jz2);
1279 dx20 = _fjsp_sub_v2r8(ix2,jx0);
1280 dy20 = _fjsp_sub_v2r8(iy2,jy0);
1281 dz20 = _fjsp_sub_v2r8(iz2,jz0);
1282 dx21 = _fjsp_sub_v2r8(ix2,jx1);
1283 dy21 = _fjsp_sub_v2r8(iy2,jy1);
1284 dz21 = _fjsp_sub_v2r8(iz2,jz1);
1285 dx22 = _fjsp_sub_v2r8(ix2,jx2);
1286 dy22 = _fjsp_sub_v2r8(iy2,jy2);
1287 dz22 = _fjsp_sub_v2r8(iz2,jz2);
1289 /* Calculate squared distance and things based on it */
1290 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1291 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1292 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1293 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1294 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1295 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1296 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1297 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1298 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1300 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
1301 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
1302 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
1303 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
1304 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
1305 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
1306 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
1307 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
1308 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
1310 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
1311 rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
1312 rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
1313 rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
1314 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
1315 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
1316 rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
1317 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
1318 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
1320 fjx0 = _fjsp_setzero_v2r8();
1321 fjy0 = _fjsp_setzero_v2r8();
1322 fjz0 = _fjsp_setzero_v2r8();
1323 fjx1 = _fjsp_setzero_v2r8();
1324 fjy1 = _fjsp_setzero_v2r8();
1325 fjz1 = _fjsp_setzero_v2r8();
1326 fjx2 = _fjsp_setzero_v2r8();
1327 fjy2 = _fjsp_setzero_v2r8();
1328 fjz2 = _fjsp_setzero_v2r8();
1330 /**************************
1331 * CALCULATE INTERACTIONS *
1332 **************************/
1334 if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
1337 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
1339 /* Calculate table index by multiplying r with table scale and truncate to integer */
1340 rt = _fjsp_mul_v2r8(r00,vftabscale);
1341 itab_tmp = _fjsp_dtox_v2r8(rt);
1342 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1343 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1344 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1349 /* REACTION-FIELD ELECTROSTATICS */
1350 felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
1352 /* CUBIC SPLINE TABLE DISPERSION */
1353 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1354 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1355 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1356 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
1357 H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
1358 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1359 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1360 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1361 fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
1363 /* CUBIC SPLINE TABLE REPULSION */
1364 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
1365 F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
1366 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1367 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
1368 H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
1369 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1370 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1371 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1372 fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
1373 fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
1375 cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
1377 fscal = _fjsp_add_v2r8(felec,fvdw);
1379 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1381 /* Update vectorial force */
1382 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
1383 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1384 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1386 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1387 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1388 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1392 /**************************
1393 * CALCULATE INTERACTIONS *
1394 **************************/
1396 if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
1399 /* REACTION-FIELD ELECTROSTATICS */
1400 felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
1402 cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
1406 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1408 /* Update vectorial force */
1409 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
1410 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1411 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1413 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1414 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1415 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1419 /**************************
1420 * CALCULATE INTERACTIONS *
1421 **************************/
1423 if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
1426 /* REACTION-FIELD ELECTROSTATICS */
1427 felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
1429 cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
1433 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1435 /* Update vectorial force */
1436 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
1437 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1438 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1440 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1441 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1442 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1446 /**************************
1447 * CALCULATE INTERACTIONS *
1448 **************************/
1450 if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
1453 /* REACTION-FIELD ELECTROSTATICS */
1454 felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
1456 cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
1460 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1462 /* Update vectorial force */
1463 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
1464 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1465 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1467 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1468 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1469 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1473 /**************************
1474 * CALCULATE INTERACTIONS *
1475 **************************/
1477 if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
1480 /* REACTION-FIELD ELECTROSTATICS */
1481 felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
1483 cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
1487 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1489 /* Update vectorial force */
1490 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
1491 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1492 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1494 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1495 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1496 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1500 /**************************
1501 * CALCULATE INTERACTIONS *
1502 **************************/
1504 if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
1507 /* REACTION-FIELD ELECTROSTATICS */
1508 felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
1510 cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
1514 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1516 /* Update vectorial force */
1517 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
1518 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1519 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1521 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1522 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1523 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1527 /**************************
1528 * CALCULATE INTERACTIONS *
1529 **************************/
1531 if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
1534 /* REACTION-FIELD ELECTROSTATICS */
1535 felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
1537 cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
1541 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1543 /* Update vectorial force */
1544 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
1545 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1546 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1548 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1549 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1550 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1554 /**************************
1555 * CALCULATE INTERACTIONS *
1556 **************************/
1558 if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1561 /* REACTION-FIELD ELECTROSTATICS */
1562 felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1564 cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1568 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1570 /* Update vectorial force */
1571 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1572 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1573 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1575 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1576 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1577 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1581 /**************************
1582 * CALCULATE INTERACTIONS *
1583 **************************/
1585 if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1588 /* REACTION-FIELD ELECTROSTATICS */
1589 felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1591 cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1595 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1597 /* Update vectorial force */
1598 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1599 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1600 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1602 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1603 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1604 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1608 gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1610 /* Inner loop uses 324 flops */
1613 if(jidx<j_index_end)
1617 j_coord_offsetA = DIM*jnrA;
1619 /* load j atom coordinates */
1620 gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1621 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1623 /* Calculate displacement vector */
1624 dx00 = _fjsp_sub_v2r8(ix0,jx0);
1625 dy00 = _fjsp_sub_v2r8(iy0,jy0);
1626 dz00 = _fjsp_sub_v2r8(iz0,jz0);
1627 dx01 = _fjsp_sub_v2r8(ix0,jx1);
1628 dy01 = _fjsp_sub_v2r8(iy0,jy1);
1629 dz01 = _fjsp_sub_v2r8(iz0,jz1);
1630 dx02 = _fjsp_sub_v2r8(ix0,jx2);
1631 dy02 = _fjsp_sub_v2r8(iy0,jy2);
1632 dz02 = _fjsp_sub_v2r8(iz0,jz2);
1633 dx10 = _fjsp_sub_v2r8(ix1,jx0);
1634 dy10 = _fjsp_sub_v2r8(iy1,jy0);
1635 dz10 = _fjsp_sub_v2r8(iz1,jz0);
1636 dx11 = _fjsp_sub_v2r8(ix1,jx1);
1637 dy11 = _fjsp_sub_v2r8(iy1,jy1);
1638 dz11 = _fjsp_sub_v2r8(iz1,jz1);
1639 dx12 = _fjsp_sub_v2r8(ix1,jx2);
1640 dy12 = _fjsp_sub_v2r8(iy1,jy2);
1641 dz12 = _fjsp_sub_v2r8(iz1,jz2);
1642 dx20 = _fjsp_sub_v2r8(ix2,jx0);
1643 dy20 = _fjsp_sub_v2r8(iy2,jy0);
1644 dz20 = _fjsp_sub_v2r8(iz2,jz0);
1645 dx21 = _fjsp_sub_v2r8(ix2,jx1);
1646 dy21 = _fjsp_sub_v2r8(iy2,jy1);
1647 dz21 = _fjsp_sub_v2r8(iz2,jz1);
1648 dx22 = _fjsp_sub_v2r8(ix2,jx2);
1649 dy22 = _fjsp_sub_v2r8(iy2,jy2);
1650 dz22 = _fjsp_sub_v2r8(iz2,jz2);
1652 /* Calculate squared distance and things based on it */
1653 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1654 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1655 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1656 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1657 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1658 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1659 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1660 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1661 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1663 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
1664 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
1665 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
1666 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
1667 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
1668 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
1669 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
1670 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
1671 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
1673 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
1674 rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
1675 rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
1676 rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
1677 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
1678 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
1679 rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
1680 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
1681 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
1683 fjx0 = _fjsp_setzero_v2r8();
1684 fjy0 = _fjsp_setzero_v2r8();
1685 fjz0 = _fjsp_setzero_v2r8();
1686 fjx1 = _fjsp_setzero_v2r8();
1687 fjy1 = _fjsp_setzero_v2r8();
1688 fjz1 = _fjsp_setzero_v2r8();
1689 fjx2 = _fjsp_setzero_v2r8();
1690 fjy2 = _fjsp_setzero_v2r8();
1691 fjz2 = _fjsp_setzero_v2r8();
1693 /**************************
1694 * CALCULATE INTERACTIONS *
1695 **************************/
1697 if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
1700 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
1702 /* Calculate table index by multiplying r with table scale and truncate to integer */
1703 rt = _fjsp_mul_v2r8(r00,vftabscale);
1704 itab_tmp = _fjsp_dtox_v2r8(rt);
1705 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1706 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1707 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1712 /* REACTION-FIELD ELECTROSTATICS */
1713 felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
1715 /* CUBIC SPLINE TABLE DISPERSION */
1716 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1717 F = _fjsp_setzero_v2r8();
1718 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1719 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
1720 H = _fjsp_setzero_v2r8();
1721 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1722 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1723 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1724 fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
1726 /* CUBIC SPLINE TABLE REPULSION */
1727 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
1728 F = _fjsp_setzero_v2r8();
1729 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1730 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
1731 H = _fjsp_setzero_v2r8();
1732 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1733 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1734 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1735 fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
1736 fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
1738 cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
1740 fscal = _fjsp_add_v2r8(felec,fvdw);
1742 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1744 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1746 /* Update vectorial force */
1747 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
1748 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1749 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1751 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1752 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1753 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1757 /**************************
1758 * CALCULATE INTERACTIONS *
1759 **************************/
1761 if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
1764 /* REACTION-FIELD ELECTROSTATICS */
1765 felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
1767 cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
1771 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1773 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1775 /* Update vectorial force */
1776 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
1777 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1778 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1780 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1781 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1782 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1786 /**************************
1787 * CALCULATE INTERACTIONS *
1788 **************************/
1790 if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
1793 /* REACTION-FIELD ELECTROSTATICS */
1794 felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
1796 cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
1800 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1802 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1804 /* Update vectorial force */
1805 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
1806 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1807 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1809 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1810 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1811 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1815 /**************************
1816 * CALCULATE INTERACTIONS *
1817 **************************/
1819 if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
1822 /* REACTION-FIELD ELECTROSTATICS */
1823 felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
1825 cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
1829 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1831 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1833 /* Update vectorial force */
1834 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
1835 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1836 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1838 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1839 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1840 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1844 /**************************
1845 * CALCULATE INTERACTIONS *
1846 **************************/
1848 if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
1851 /* REACTION-FIELD ELECTROSTATICS */
1852 felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
1854 cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
1858 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1860 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1862 /* Update vectorial force */
1863 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
1864 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1865 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1867 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1868 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1869 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1873 /**************************
1874 * CALCULATE INTERACTIONS *
1875 **************************/
1877 if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
1880 /* REACTION-FIELD ELECTROSTATICS */
1881 felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
1883 cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
1887 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1889 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1891 /* Update vectorial force */
1892 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
1893 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1894 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1896 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1897 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1898 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1902 /**************************
1903 * CALCULATE INTERACTIONS *
1904 **************************/
1906 if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
1909 /* REACTION-FIELD ELECTROSTATICS */
1910 felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
1912 cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
1916 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1918 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1920 /* Update vectorial force */
1921 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
1922 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1923 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1925 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1926 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1927 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1931 /**************************
1932 * CALCULATE INTERACTIONS *
1933 **************************/
1935 if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1938 /* REACTION-FIELD ELECTROSTATICS */
1939 felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1941 cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1945 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1947 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1949 /* Update vectorial force */
1950 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1951 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1952 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1954 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1955 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1956 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1960 /**************************
1961 * CALCULATE INTERACTIONS *
1962 **************************/
1964 if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1967 /* REACTION-FIELD ELECTROSTATICS */
1968 felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1970 cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1974 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1976 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1978 /* Update vectorial force */
1979 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1980 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1981 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1983 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1984 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1985 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1989 gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1991 /* Inner loop uses 324 flops */
1994 /* End of innermost loop */
1996 gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1997 f+i_coord_offset,fshift+i_shift_offset);
1999 /* Increment number of inner iterations */
2000 inneriter += j_index_end - j_index_start;
2002 /* Outer loop uses 18 flops */
2005 /* Increment number of outer iterations */
2008 /* Update outer/inner flops */
2010 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*324);