2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
42 #include "../nb_kernel.h"
43 #include "gromacs/legacyheaders/types/simple.h"
44 #include "gromacs/math/vec.h"
45 #include "gromacs/legacyheaders/nrnb.h"
47 #include "kernelutil_sparc64_hpc_ace_double.h"
50 * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double
51 * Electrostatics interaction: ReactionField
52 * VdW interaction: CubicSplineTable
53 * Geometry: Water4-Water4
54 * Calculate force/pot: PotentialAndForce
57 nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double
58 (t_nblist * gmx_restrict nlist,
59 rvec * gmx_restrict xx,
60 rvec * gmx_restrict ff,
61 t_forcerec * gmx_restrict fr,
62 t_mdatoms * gmx_restrict mdatoms,
63 nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
64 t_nrnb * gmx_restrict nrnb)
66 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
67 * just 0 for non-waters.
68 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
69 * jnr indices corresponding to data put in the four positions in the SIMD register.
71 int i_shift_offset,i_coord_offset,outeriter,inneriter;
72 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
74 int j_coord_offsetA,j_coord_offsetB;
75 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
77 real *shiftvec,*fshift,*x,*f;
78 _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
80 _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
82 _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
84 _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
86 _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
87 int vdwjidx0A,vdwjidx0B;
88 _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
89 int vdwjidx1A,vdwjidx1B;
90 _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
91 int vdwjidx2A,vdwjidx2B;
92 _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
93 int vdwjidx3A,vdwjidx3B;
94 _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
95 _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
96 _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
97 _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
98 _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
99 _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
100 _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
101 _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
102 _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
103 _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
104 _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
105 _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
108 _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
111 _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
112 _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
113 _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
116 _fjsp_v2r8 dummy_mask,cutoff_mask;
117 _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
118 _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
119 union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
126 jindex = nlist->jindex;
128 shiftidx = nlist->shift;
130 shiftvec = fr->shift_vec[0];
131 fshift = fr->fshift[0];
132 facel = gmx_fjsp_set1_v2r8(fr->epsfac);
133 charge = mdatoms->chargeA;
134 krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
135 krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
136 crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
137 nvdwtype = fr->ntype;
139 vdwtype = mdatoms->typeA;
141 vftab = kernel_data->table_vdw->data;
142 vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
144 /* Setup water-specific parameters */
145 inr = nlist->iinr[0];
146 iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
147 iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
148 iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
149 vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
151 jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
152 jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
153 jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
154 vdwjidx0A = 2*vdwtype[inr+0];
155 c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
156 c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
157 qq11 = _fjsp_mul_v2r8(iq1,jq1);
158 qq12 = _fjsp_mul_v2r8(iq1,jq2);
159 qq13 = _fjsp_mul_v2r8(iq1,jq3);
160 qq21 = _fjsp_mul_v2r8(iq2,jq1);
161 qq22 = _fjsp_mul_v2r8(iq2,jq2);
162 qq23 = _fjsp_mul_v2r8(iq2,jq3);
163 qq31 = _fjsp_mul_v2r8(iq3,jq1);
164 qq32 = _fjsp_mul_v2r8(iq3,jq2);
165 qq33 = _fjsp_mul_v2r8(iq3,jq3);
167 /* Avoid stupid compiler warnings */
175 /* Start outer loop over neighborlists */
176 for(iidx=0; iidx<nri; iidx++)
178 /* Load shift vector for this list */
179 i_shift_offset = DIM*shiftidx[iidx];
181 /* Load limits for loop over neighbors */
182 j_index_start = jindex[iidx];
183 j_index_end = jindex[iidx+1];
185 /* Get outer coordinate index */
187 i_coord_offset = DIM*inr;
189 /* Load i particle coords and add shift vector */
190 gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
191 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
193 fix0 = _fjsp_setzero_v2r8();
194 fiy0 = _fjsp_setzero_v2r8();
195 fiz0 = _fjsp_setzero_v2r8();
196 fix1 = _fjsp_setzero_v2r8();
197 fiy1 = _fjsp_setzero_v2r8();
198 fiz1 = _fjsp_setzero_v2r8();
199 fix2 = _fjsp_setzero_v2r8();
200 fiy2 = _fjsp_setzero_v2r8();
201 fiz2 = _fjsp_setzero_v2r8();
202 fix3 = _fjsp_setzero_v2r8();
203 fiy3 = _fjsp_setzero_v2r8();
204 fiz3 = _fjsp_setzero_v2r8();
206 /* Reset potential sums */
207 velecsum = _fjsp_setzero_v2r8();
208 vvdwsum = _fjsp_setzero_v2r8();
210 /* Start inner kernel loop */
211 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
214 /* Get j neighbor index, and coordinate index */
217 j_coord_offsetA = DIM*jnrA;
218 j_coord_offsetB = DIM*jnrB;
220 /* load j atom coordinates */
221 gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
222 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
223 &jy2,&jz2,&jx3,&jy3,&jz3);
225 /* Calculate displacement vector */
226 dx00 = _fjsp_sub_v2r8(ix0,jx0);
227 dy00 = _fjsp_sub_v2r8(iy0,jy0);
228 dz00 = _fjsp_sub_v2r8(iz0,jz0);
229 dx11 = _fjsp_sub_v2r8(ix1,jx1);
230 dy11 = _fjsp_sub_v2r8(iy1,jy1);
231 dz11 = _fjsp_sub_v2r8(iz1,jz1);
232 dx12 = _fjsp_sub_v2r8(ix1,jx2);
233 dy12 = _fjsp_sub_v2r8(iy1,jy2);
234 dz12 = _fjsp_sub_v2r8(iz1,jz2);
235 dx13 = _fjsp_sub_v2r8(ix1,jx3);
236 dy13 = _fjsp_sub_v2r8(iy1,jy3);
237 dz13 = _fjsp_sub_v2r8(iz1,jz3);
238 dx21 = _fjsp_sub_v2r8(ix2,jx1);
239 dy21 = _fjsp_sub_v2r8(iy2,jy1);
240 dz21 = _fjsp_sub_v2r8(iz2,jz1);
241 dx22 = _fjsp_sub_v2r8(ix2,jx2);
242 dy22 = _fjsp_sub_v2r8(iy2,jy2);
243 dz22 = _fjsp_sub_v2r8(iz2,jz2);
244 dx23 = _fjsp_sub_v2r8(ix2,jx3);
245 dy23 = _fjsp_sub_v2r8(iy2,jy3);
246 dz23 = _fjsp_sub_v2r8(iz2,jz3);
247 dx31 = _fjsp_sub_v2r8(ix3,jx1);
248 dy31 = _fjsp_sub_v2r8(iy3,jy1);
249 dz31 = _fjsp_sub_v2r8(iz3,jz1);
250 dx32 = _fjsp_sub_v2r8(ix3,jx2);
251 dy32 = _fjsp_sub_v2r8(iy3,jy2);
252 dz32 = _fjsp_sub_v2r8(iz3,jz2);
253 dx33 = _fjsp_sub_v2r8(ix3,jx3);
254 dy33 = _fjsp_sub_v2r8(iy3,jy3);
255 dz33 = _fjsp_sub_v2r8(iz3,jz3);
257 /* Calculate squared distance and things based on it */
258 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
259 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
260 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
261 rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
262 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
263 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
264 rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
265 rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
266 rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
267 rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
269 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
270 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
271 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
272 rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
273 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
274 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
275 rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
276 rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
277 rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
278 rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
280 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
281 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
282 rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
283 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
284 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
285 rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
286 rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
287 rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
288 rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
290 fjx0 = _fjsp_setzero_v2r8();
291 fjy0 = _fjsp_setzero_v2r8();
292 fjz0 = _fjsp_setzero_v2r8();
293 fjx1 = _fjsp_setzero_v2r8();
294 fjy1 = _fjsp_setzero_v2r8();
295 fjz1 = _fjsp_setzero_v2r8();
296 fjx2 = _fjsp_setzero_v2r8();
297 fjy2 = _fjsp_setzero_v2r8();
298 fjz2 = _fjsp_setzero_v2r8();
299 fjx3 = _fjsp_setzero_v2r8();
300 fjy3 = _fjsp_setzero_v2r8();
301 fjz3 = _fjsp_setzero_v2r8();
303 /**************************
304 * CALCULATE INTERACTIONS *
305 **************************/
307 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
309 /* Calculate table index by multiplying r with table scale and truncate to integer */
310 rt = _fjsp_mul_v2r8(r00,vftabscale);
311 itab_tmp = _fjsp_dtox_v2r8(rt);
312 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
313 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
314 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
319 /* CUBIC SPLINE TABLE DISPERSION */
320 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
321 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
322 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
323 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
324 H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
325 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
326 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
327 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
328 vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
329 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
330 fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
332 /* CUBIC SPLINE TABLE REPULSION */
333 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
334 F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
335 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
336 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
337 H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
338 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
339 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
340 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
341 vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
342 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
343 fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
344 vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
345 fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
347 /* Update potential sum for this i atom from the interaction with this j atom. */
348 vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
352 /* Update vectorial force */
353 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
354 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
355 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
357 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
358 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
359 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
361 /**************************
362 * CALCULATE INTERACTIONS *
363 **************************/
365 /* REACTION-FIELD ELECTROSTATICS */
366 velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
367 felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
369 /* Update potential sum for this i atom from the interaction with this j atom. */
370 velecsum = _fjsp_add_v2r8(velecsum,velec);
374 /* Update vectorial force */
375 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
376 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
377 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
379 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
380 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
381 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
383 /**************************
384 * CALCULATE INTERACTIONS *
385 **************************/
387 /* REACTION-FIELD ELECTROSTATICS */
388 velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
389 felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
391 /* Update potential sum for this i atom from the interaction with this j atom. */
392 velecsum = _fjsp_add_v2r8(velecsum,velec);
396 /* Update vectorial force */
397 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
398 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
399 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
401 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
402 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
403 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
405 /**************************
406 * CALCULATE INTERACTIONS *
407 **************************/
409 /* REACTION-FIELD ELECTROSTATICS */
410 velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
411 felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
413 /* Update potential sum for this i atom from the interaction with this j atom. */
414 velecsum = _fjsp_add_v2r8(velecsum,velec);
418 /* Update vectorial force */
419 fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
420 fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
421 fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
423 fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
424 fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
425 fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
427 /**************************
428 * CALCULATE INTERACTIONS *
429 **************************/
431 /* REACTION-FIELD ELECTROSTATICS */
432 velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
433 felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
435 /* Update potential sum for this i atom from the interaction with this j atom. */
436 velecsum = _fjsp_add_v2r8(velecsum,velec);
440 /* Update vectorial force */
441 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
442 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
443 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
445 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
446 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
447 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
449 /**************************
450 * CALCULATE INTERACTIONS *
451 **************************/
453 /* REACTION-FIELD ELECTROSTATICS */
454 velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
455 felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
457 /* Update potential sum for this i atom from the interaction with this j atom. */
458 velecsum = _fjsp_add_v2r8(velecsum,velec);
462 /* Update vectorial force */
463 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
464 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
465 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
467 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
468 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
469 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
471 /**************************
472 * CALCULATE INTERACTIONS *
473 **************************/
475 /* REACTION-FIELD ELECTROSTATICS */
476 velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
477 felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
479 /* Update potential sum for this i atom from the interaction with this j atom. */
480 velecsum = _fjsp_add_v2r8(velecsum,velec);
484 /* Update vectorial force */
485 fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
486 fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
487 fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
489 fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
490 fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
491 fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
493 /**************************
494 * CALCULATE INTERACTIONS *
495 **************************/
497 /* REACTION-FIELD ELECTROSTATICS */
498 velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
499 felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
501 /* Update potential sum for this i atom from the interaction with this j atom. */
502 velecsum = _fjsp_add_v2r8(velecsum,velec);
506 /* Update vectorial force */
507 fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
508 fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
509 fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
511 fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
512 fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
513 fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
515 /**************************
516 * CALCULATE INTERACTIONS *
517 **************************/
519 /* REACTION-FIELD ELECTROSTATICS */
520 velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
521 felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
523 /* Update potential sum for this i atom from the interaction with this j atom. */
524 velecsum = _fjsp_add_v2r8(velecsum,velec);
528 /* Update vectorial force */
529 fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
530 fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
531 fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
533 fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
534 fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
535 fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
537 /**************************
538 * CALCULATE INTERACTIONS *
539 **************************/
541 /* REACTION-FIELD ELECTROSTATICS */
542 velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
543 felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
545 /* Update potential sum for this i atom from the interaction with this j atom. */
546 velecsum = _fjsp_add_v2r8(velecsum,velec);
550 /* Update vectorial force */
551 fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
552 fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
553 fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
555 fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
556 fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
557 fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
559 gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
561 /* Inner loop uses 377 flops */
568 j_coord_offsetA = DIM*jnrA;
570 /* load j atom coordinates */
571 gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
572 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
573 &jy2,&jz2,&jx3,&jy3,&jz3);
575 /* Calculate displacement vector */
576 dx00 = _fjsp_sub_v2r8(ix0,jx0);
577 dy00 = _fjsp_sub_v2r8(iy0,jy0);
578 dz00 = _fjsp_sub_v2r8(iz0,jz0);
579 dx11 = _fjsp_sub_v2r8(ix1,jx1);
580 dy11 = _fjsp_sub_v2r8(iy1,jy1);
581 dz11 = _fjsp_sub_v2r8(iz1,jz1);
582 dx12 = _fjsp_sub_v2r8(ix1,jx2);
583 dy12 = _fjsp_sub_v2r8(iy1,jy2);
584 dz12 = _fjsp_sub_v2r8(iz1,jz2);
585 dx13 = _fjsp_sub_v2r8(ix1,jx3);
586 dy13 = _fjsp_sub_v2r8(iy1,jy3);
587 dz13 = _fjsp_sub_v2r8(iz1,jz3);
588 dx21 = _fjsp_sub_v2r8(ix2,jx1);
589 dy21 = _fjsp_sub_v2r8(iy2,jy1);
590 dz21 = _fjsp_sub_v2r8(iz2,jz1);
591 dx22 = _fjsp_sub_v2r8(ix2,jx2);
592 dy22 = _fjsp_sub_v2r8(iy2,jy2);
593 dz22 = _fjsp_sub_v2r8(iz2,jz2);
594 dx23 = _fjsp_sub_v2r8(ix2,jx3);
595 dy23 = _fjsp_sub_v2r8(iy2,jy3);
596 dz23 = _fjsp_sub_v2r8(iz2,jz3);
597 dx31 = _fjsp_sub_v2r8(ix3,jx1);
598 dy31 = _fjsp_sub_v2r8(iy3,jy1);
599 dz31 = _fjsp_sub_v2r8(iz3,jz1);
600 dx32 = _fjsp_sub_v2r8(ix3,jx2);
601 dy32 = _fjsp_sub_v2r8(iy3,jy2);
602 dz32 = _fjsp_sub_v2r8(iz3,jz2);
603 dx33 = _fjsp_sub_v2r8(ix3,jx3);
604 dy33 = _fjsp_sub_v2r8(iy3,jy3);
605 dz33 = _fjsp_sub_v2r8(iz3,jz3);
607 /* Calculate squared distance and things based on it */
608 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
609 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
610 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
611 rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
612 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
613 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
614 rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
615 rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
616 rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
617 rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
619 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
620 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
621 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
622 rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
623 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
624 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
625 rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
626 rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
627 rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
628 rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
630 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
631 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
632 rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
633 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
634 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
635 rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
636 rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
637 rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
638 rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
640 fjx0 = _fjsp_setzero_v2r8();
641 fjy0 = _fjsp_setzero_v2r8();
642 fjz0 = _fjsp_setzero_v2r8();
643 fjx1 = _fjsp_setzero_v2r8();
644 fjy1 = _fjsp_setzero_v2r8();
645 fjz1 = _fjsp_setzero_v2r8();
646 fjx2 = _fjsp_setzero_v2r8();
647 fjy2 = _fjsp_setzero_v2r8();
648 fjz2 = _fjsp_setzero_v2r8();
649 fjx3 = _fjsp_setzero_v2r8();
650 fjy3 = _fjsp_setzero_v2r8();
651 fjz3 = _fjsp_setzero_v2r8();
653 /**************************
654 * CALCULATE INTERACTIONS *
655 **************************/
657 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
659 /* Calculate table index by multiplying r with table scale and truncate to integer */
660 rt = _fjsp_mul_v2r8(r00,vftabscale);
661 itab_tmp = _fjsp_dtox_v2r8(rt);
662 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
663 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
664 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
669 /* CUBIC SPLINE TABLE DISPERSION */
670 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
671 F = _fjsp_setzero_v2r8();
672 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
673 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
674 H = _fjsp_setzero_v2r8();
675 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
676 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
677 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
678 vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
679 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
680 fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
682 /* CUBIC SPLINE TABLE REPULSION */
683 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
684 F = _fjsp_setzero_v2r8();
685 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
686 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
687 H = _fjsp_setzero_v2r8();
688 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
689 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
690 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
691 vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
692 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
693 fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
694 vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
695 fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
697 /* Update potential sum for this i atom from the interaction with this j atom. */
698 vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
699 vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
703 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
705 /* Update vectorial force */
706 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
707 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
708 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
710 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
711 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
712 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
714 /**************************
715 * CALCULATE INTERACTIONS *
716 **************************/
718 /* REACTION-FIELD ELECTROSTATICS */
719 velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
720 felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
722 /* Update potential sum for this i atom from the interaction with this j atom. */
723 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
724 velecsum = _fjsp_add_v2r8(velecsum,velec);
728 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
730 /* Update vectorial force */
731 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
732 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
733 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
735 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
736 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
737 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
739 /**************************
740 * CALCULATE INTERACTIONS *
741 **************************/
743 /* REACTION-FIELD ELECTROSTATICS */
744 velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
745 felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
747 /* Update potential sum for this i atom from the interaction with this j atom. */
748 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
749 velecsum = _fjsp_add_v2r8(velecsum,velec);
753 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
755 /* Update vectorial force */
756 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
757 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
758 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
760 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
761 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
762 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
764 /**************************
765 * CALCULATE INTERACTIONS *
766 **************************/
768 /* REACTION-FIELD ELECTROSTATICS */
769 velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
770 felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
772 /* Update potential sum for this i atom from the interaction with this j atom. */
773 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
774 velecsum = _fjsp_add_v2r8(velecsum,velec);
778 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
780 /* Update vectorial force */
781 fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
782 fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
783 fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
785 fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
786 fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
787 fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
789 /**************************
790 * CALCULATE INTERACTIONS *
791 **************************/
793 /* REACTION-FIELD ELECTROSTATICS */
794 velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
795 felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
797 /* Update potential sum for this i atom from the interaction with this j atom. */
798 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
799 velecsum = _fjsp_add_v2r8(velecsum,velec);
803 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
805 /* Update vectorial force */
806 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
807 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
808 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
810 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
811 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
812 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
814 /**************************
815 * CALCULATE INTERACTIONS *
816 **************************/
818 /* REACTION-FIELD ELECTROSTATICS */
819 velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
820 felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
822 /* Update potential sum for this i atom from the interaction with this j atom. */
823 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
824 velecsum = _fjsp_add_v2r8(velecsum,velec);
828 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
830 /* Update vectorial force */
831 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
832 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
833 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
835 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
836 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
837 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
839 /**************************
840 * CALCULATE INTERACTIONS *
841 **************************/
843 /* REACTION-FIELD ELECTROSTATICS */
844 velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
845 felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
847 /* Update potential sum for this i atom from the interaction with this j atom. */
848 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
849 velecsum = _fjsp_add_v2r8(velecsum,velec);
853 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
855 /* Update vectorial force */
856 fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
857 fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
858 fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
860 fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
861 fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
862 fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
864 /**************************
865 * CALCULATE INTERACTIONS *
866 **************************/
868 /* REACTION-FIELD ELECTROSTATICS */
869 velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
870 felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
872 /* Update potential sum for this i atom from the interaction with this j atom. */
873 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
874 velecsum = _fjsp_add_v2r8(velecsum,velec);
878 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
880 /* Update vectorial force */
881 fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
882 fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
883 fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
885 fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
886 fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
887 fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
889 /**************************
890 * CALCULATE INTERACTIONS *
891 **************************/
893 /* REACTION-FIELD ELECTROSTATICS */
894 velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
895 felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
897 /* Update potential sum for this i atom from the interaction with this j atom. */
898 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
899 velecsum = _fjsp_add_v2r8(velecsum,velec);
903 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
905 /* Update vectorial force */
906 fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
907 fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
908 fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
910 fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
911 fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
912 fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
914 /**************************
915 * CALCULATE INTERACTIONS *
916 **************************/
918 /* REACTION-FIELD ELECTROSTATICS */
919 velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
920 felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
922 /* Update potential sum for this i atom from the interaction with this j atom. */
923 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
924 velecsum = _fjsp_add_v2r8(velecsum,velec);
928 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
930 /* Update vectorial force */
931 fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
932 fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
933 fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
935 fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
936 fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
937 fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
939 gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
941 /* Inner loop uses 377 flops */
944 /* End of innermost loop */
946 gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
947 f+i_coord_offset,fshift+i_shift_offset);
950 /* Update potential energies */
951 gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
952 gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
954 /* Increment number of inner iterations */
955 inneriter += j_index_end - j_index_start;
957 /* Outer loop uses 26 flops */
960 /* Increment number of outer iterations */
963 /* Update outer/inner flops */
965 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*377);
968 * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double
969 * Electrostatics interaction: ReactionField
970 * VdW interaction: CubicSplineTable
971 * Geometry: Water4-Water4
972 * Calculate force/pot: Force
975 nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double
976 (t_nblist * gmx_restrict nlist,
977 rvec * gmx_restrict xx,
978 rvec * gmx_restrict ff,
979 t_forcerec * gmx_restrict fr,
980 t_mdatoms * gmx_restrict mdatoms,
981 nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
982 t_nrnb * gmx_restrict nrnb)
984 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
985 * just 0 for non-waters.
986 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
987 * jnr indices corresponding to data put in the four positions in the SIMD register.
989 int i_shift_offset,i_coord_offset,outeriter,inneriter;
990 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
992 int j_coord_offsetA,j_coord_offsetB;
993 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
995 real *shiftvec,*fshift,*x,*f;
996 _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
998 _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1000 _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1002 _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1004 _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
1005 int vdwjidx0A,vdwjidx0B;
1006 _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1007 int vdwjidx1A,vdwjidx1B;
1008 _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1009 int vdwjidx2A,vdwjidx2B;
1010 _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1011 int vdwjidx3A,vdwjidx3B;
1012 _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
1013 _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1014 _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1015 _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1016 _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
1017 _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1018 _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1019 _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
1020 _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
1021 _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
1022 _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
1023 _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
1026 _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
1029 _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
1030 _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
1031 _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
1033 _fjsp_v2r8 itab_tmp;
1034 _fjsp_v2r8 dummy_mask,cutoff_mask;
1035 _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
1036 _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
1037 union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1044 jindex = nlist->jindex;
1046 shiftidx = nlist->shift;
1048 shiftvec = fr->shift_vec[0];
1049 fshift = fr->fshift[0];
1050 facel = gmx_fjsp_set1_v2r8(fr->epsfac);
1051 charge = mdatoms->chargeA;
1052 krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
1053 krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
1054 crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
1055 nvdwtype = fr->ntype;
1056 vdwparam = fr->nbfp;
1057 vdwtype = mdatoms->typeA;
1059 vftab = kernel_data->table_vdw->data;
1060 vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
1062 /* Setup water-specific parameters */
1063 inr = nlist->iinr[0];
1064 iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1065 iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1066 iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
1067 vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
1069 jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
1070 jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
1071 jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
1072 vdwjidx0A = 2*vdwtype[inr+0];
1073 c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
1074 c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
1075 qq11 = _fjsp_mul_v2r8(iq1,jq1);
1076 qq12 = _fjsp_mul_v2r8(iq1,jq2);
1077 qq13 = _fjsp_mul_v2r8(iq1,jq3);
1078 qq21 = _fjsp_mul_v2r8(iq2,jq1);
1079 qq22 = _fjsp_mul_v2r8(iq2,jq2);
1080 qq23 = _fjsp_mul_v2r8(iq2,jq3);
1081 qq31 = _fjsp_mul_v2r8(iq3,jq1);
1082 qq32 = _fjsp_mul_v2r8(iq3,jq2);
1083 qq33 = _fjsp_mul_v2r8(iq3,jq3);
1085 /* Avoid stupid compiler warnings */
1087 j_coord_offsetA = 0;
1088 j_coord_offsetB = 0;
1093 /* Start outer loop over neighborlists */
1094 for(iidx=0; iidx<nri; iidx++)
1096 /* Load shift vector for this list */
1097 i_shift_offset = DIM*shiftidx[iidx];
1099 /* Load limits for loop over neighbors */
1100 j_index_start = jindex[iidx];
1101 j_index_end = jindex[iidx+1];
1103 /* Get outer coordinate index */
1105 i_coord_offset = DIM*inr;
1107 /* Load i particle coords and add shift vector */
1108 gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
1109 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
1111 fix0 = _fjsp_setzero_v2r8();
1112 fiy0 = _fjsp_setzero_v2r8();
1113 fiz0 = _fjsp_setzero_v2r8();
1114 fix1 = _fjsp_setzero_v2r8();
1115 fiy1 = _fjsp_setzero_v2r8();
1116 fiz1 = _fjsp_setzero_v2r8();
1117 fix2 = _fjsp_setzero_v2r8();
1118 fiy2 = _fjsp_setzero_v2r8();
1119 fiz2 = _fjsp_setzero_v2r8();
1120 fix3 = _fjsp_setzero_v2r8();
1121 fiy3 = _fjsp_setzero_v2r8();
1122 fiz3 = _fjsp_setzero_v2r8();
1124 /* Start inner kernel loop */
1125 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1128 /* Get j neighbor index, and coordinate index */
1130 jnrB = jjnr[jidx+1];
1131 j_coord_offsetA = DIM*jnrA;
1132 j_coord_offsetB = DIM*jnrB;
1134 /* load j atom coordinates */
1135 gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
1136 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
1137 &jy2,&jz2,&jx3,&jy3,&jz3);
1139 /* Calculate displacement vector */
1140 dx00 = _fjsp_sub_v2r8(ix0,jx0);
1141 dy00 = _fjsp_sub_v2r8(iy0,jy0);
1142 dz00 = _fjsp_sub_v2r8(iz0,jz0);
1143 dx11 = _fjsp_sub_v2r8(ix1,jx1);
1144 dy11 = _fjsp_sub_v2r8(iy1,jy1);
1145 dz11 = _fjsp_sub_v2r8(iz1,jz1);
1146 dx12 = _fjsp_sub_v2r8(ix1,jx2);
1147 dy12 = _fjsp_sub_v2r8(iy1,jy2);
1148 dz12 = _fjsp_sub_v2r8(iz1,jz2);
1149 dx13 = _fjsp_sub_v2r8(ix1,jx3);
1150 dy13 = _fjsp_sub_v2r8(iy1,jy3);
1151 dz13 = _fjsp_sub_v2r8(iz1,jz3);
1152 dx21 = _fjsp_sub_v2r8(ix2,jx1);
1153 dy21 = _fjsp_sub_v2r8(iy2,jy1);
1154 dz21 = _fjsp_sub_v2r8(iz2,jz1);
1155 dx22 = _fjsp_sub_v2r8(ix2,jx2);
1156 dy22 = _fjsp_sub_v2r8(iy2,jy2);
1157 dz22 = _fjsp_sub_v2r8(iz2,jz2);
1158 dx23 = _fjsp_sub_v2r8(ix2,jx3);
1159 dy23 = _fjsp_sub_v2r8(iy2,jy3);
1160 dz23 = _fjsp_sub_v2r8(iz2,jz3);
1161 dx31 = _fjsp_sub_v2r8(ix3,jx1);
1162 dy31 = _fjsp_sub_v2r8(iy3,jy1);
1163 dz31 = _fjsp_sub_v2r8(iz3,jz1);
1164 dx32 = _fjsp_sub_v2r8(ix3,jx2);
1165 dy32 = _fjsp_sub_v2r8(iy3,jy2);
1166 dz32 = _fjsp_sub_v2r8(iz3,jz2);
1167 dx33 = _fjsp_sub_v2r8(ix3,jx3);
1168 dy33 = _fjsp_sub_v2r8(iy3,jy3);
1169 dz33 = _fjsp_sub_v2r8(iz3,jz3);
1171 /* Calculate squared distance and things based on it */
1172 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1173 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1174 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1175 rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
1176 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1177 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1178 rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
1179 rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
1180 rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
1181 rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
1183 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
1184 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
1185 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
1186 rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
1187 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
1188 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
1189 rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
1190 rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
1191 rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
1192 rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
1194 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
1195 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
1196 rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
1197 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
1198 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
1199 rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
1200 rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
1201 rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
1202 rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
1204 fjx0 = _fjsp_setzero_v2r8();
1205 fjy0 = _fjsp_setzero_v2r8();
1206 fjz0 = _fjsp_setzero_v2r8();
1207 fjx1 = _fjsp_setzero_v2r8();
1208 fjy1 = _fjsp_setzero_v2r8();
1209 fjz1 = _fjsp_setzero_v2r8();
1210 fjx2 = _fjsp_setzero_v2r8();
1211 fjy2 = _fjsp_setzero_v2r8();
1212 fjz2 = _fjsp_setzero_v2r8();
1213 fjx3 = _fjsp_setzero_v2r8();
1214 fjy3 = _fjsp_setzero_v2r8();
1215 fjz3 = _fjsp_setzero_v2r8();
1217 /**************************
1218 * CALCULATE INTERACTIONS *
1219 **************************/
1221 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
1223 /* Calculate table index by multiplying r with table scale and truncate to integer */
1224 rt = _fjsp_mul_v2r8(r00,vftabscale);
1225 itab_tmp = _fjsp_dtox_v2r8(rt);
1226 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1227 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1228 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1233 /* CUBIC SPLINE TABLE DISPERSION */
1234 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1235 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1236 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1237 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
1238 H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
1239 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1240 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1241 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1242 fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
1244 /* CUBIC SPLINE TABLE REPULSION */
1245 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
1246 F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
1247 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1248 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
1249 H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
1250 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1251 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1252 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1253 fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
1254 fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
1258 /* Update vectorial force */
1259 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
1260 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1261 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1263 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1264 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1265 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1267 /**************************
1268 * CALCULATE INTERACTIONS *
1269 **************************/
1271 /* REACTION-FIELD ELECTROSTATICS */
1272 felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
1276 /* Update vectorial force */
1277 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
1278 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1279 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1281 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1282 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1283 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1285 /**************************
1286 * CALCULATE INTERACTIONS *
1287 **************************/
1289 /* REACTION-FIELD ELECTROSTATICS */
1290 felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
1294 /* Update vectorial force */
1295 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
1296 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1297 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1299 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1300 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1301 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1303 /**************************
1304 * CALCULATE INTERACTIONS *
1305 **************************/
1307 /* REACTION-FIELD ELECTROSTATICS */
1308 felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
1312 /* Update vectorial force */
1313 fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
1314 fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
1315 fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
1317 fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
1318 fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
1319 fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
1321 /**************************
1322 * CALCULATE INTERACTIONS *
1323 **************************/
1325 /* REACTION-FIELD ELECTROSTATICS */
1326 felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1330 /* Update vectorial force */
1331 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1332 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1333 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1335 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1336 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1337 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1339 /**************************
1340 * CALCULATE INTERACTIONS *
1341 **************************/
1343 /* REACTION-FIELD ELECTROSTATICS */
1344 felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1348 /* Update vectorial force */
1349 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1350 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1351 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1353 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1354 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1355 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1357 /**************************
1358 * CALCULATE INTERACTIONS *
1359 **************************/
1361 /* REACTION-FIELD ELECTROSTATICS */
1362 felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
1366 /* Update vectorial force */
1367 fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
1368 fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
1369 fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
1371 fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
1372 fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
1373 fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
1375 /**************************
1376 * CALCULATE INTERACTIONS *
1377 **************************/
1379 /* REACTION-FIELD ELECTROSTATICS */
1380 felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
1384 /* Update vectorial force */
1385 fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
1386 fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
1387 fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
1389 fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
1390 fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
1391 fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
1393 /**************************
1394 * CALCULATE INTERACTIONS *
1395 **************************/
1397 /* REACTION-FIELD ELECTROSTATICS */
1398 felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
1402 /* Update vectorial force */
1403 fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
1404 fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
1405 fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
1407 fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
1408 fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
1409 fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
1411 /**************************
1412 * CALCULATE INTERACTIONS *
1413 **************************/
1415 /* REACTION-FIELD ELECTROSTATICS */
1416 felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
1420 /* Update vectorial force */
1421 fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
1422 fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
1423 fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
1425 fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
1426 fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
1427 fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
1429 gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1431 /* Inner loop uses 324 flops */
1434 if(jidx<j_index_end)
1438 j_coord_offsetA = DIM*jnrA;
1440 /* load j atom coordinates */
1441 gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1442 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
1443 &jy2,&jz2,&jx3,&jy3,&jz3);
1445 /* Calculate displacement vector */
1446 dx00 = _fjsp_sub_v2r8(ix0,jx0);
1447 dy00 = _fjsp_sub_v2r8(iy0,jy0);
1448 dz00 = _fjsp_sub_v2r8(iz0,jz0);
1449 dx11 = _fjsp_sub_v2r8(ix1,jx1);
1450 dy11 = _fjsp_sub_v2r8(iy1,jy1);
1451 dz11 = _fjsp_sub_v2r8(iz1,jz1);
1452 dx12 = _fjsp_sub_v2r8(ix1,jx2);
1453 dy12 = _fjsp_sub_v2r8(iy1,jy2);
1454 dz12 = _fjsp_sub_v2r8(iz1,jz2);
1455 dx13 = _fjsp_sub_v2r8(ix1,jx3);
1456 dy13 = _fjsp_sub_v2r8(iy1,jy3);
1457 dz13 = _fjsp_sub_v2r8(iz1,jz3);
1458 dx21 = _fjsp_sub_v2r8(ix2,jx1);
1459 dy21 = _fjsp_sub_v2r8(iy2,jy1);
1460 dz21 = _fjsp_sub_v2r8(iz2,jz1);
1461 dx22 = _fjsp_sub_v2r8(ix2,jx2);
1462 dy22 = _fjsp_sub_v2r8(iy2,jy2);
1463 dz22 = _fjsp_sub_v2r8(iz2,jz2);
1464 dx23 = _fjsp_sub_v2r8(ix2,jx3);
1465 dy23 = _fjsp_sub_v2r8(iy2,jy3);
1466 dz23 = _fjsp_sub_v2r8(iz2,jz3);
1467 dx31 = _fjsp_sub_v2r8(ix3,jx1);
1468 dy31 = _fjsp_sub_v2r8(iy3,jy1);
1469 dz31 = _fjsp_sub_v2r8(iz3,jz1);
1470 dx32 = _fjsp_sub_v2r8(ix3,jx2);
1471 dy32 = _fjsp_sub_v2r8(iy3,jy2);
1472 dz32 = _fjsp_sub_v2r8(iz3,jz2);
1473 dx33 = _fjsp_sub_v2r8(ix3,jx3);
1474 dy33 = _fjsp_sub_v2r8(iy3,jy3);
1475 dz33 = _fjsp_sub_v2r8(iz3,jz3);
1477 /* Calculate squared distance and things based on it */
1478 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1479 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1480 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1481 rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
1482 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1483 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1484 rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
1485 rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
1486 rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
1487 rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
1489 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
1490 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
1491 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
1492 rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
1493 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
1494 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
1495 rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
1496 rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
1497 rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
1498 rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
1500 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
1501 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
1502 rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
1503 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
1504 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
1505 rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
1506 rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
1507 rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
1508 rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
1510 fjx0 = _fjsp_setzero_v2r8();
1511 fjy0 = _fjsp_setzero_v2r8();
1512 fjz0 = _fjsp_setzero_v2r8();
1513 fjx1 = _fjsp_setzero_v2r8();
1514 fjy1 = _fjsp_setzero_v2r8();
1515 fjz1 = _fjsp_setzero_v2r8();
1516 fjx2 = _fjsp_setzero_v2r8();
1517 fjy2 = _fjsp_setzero_v2r8();
1518 fjz2 = _fjsp_setzero_v2r8();
1519 fjx3 = _fjsp_setzero_v2r8();
1520 fjy3 = _fjsp_setzero_v2r8();
1521 fjz3 = _fjsp_setzero_v2r8();
1523 /**************************
1524 * CALCULATE INTERACTIONS *
1525 **************************/
1527 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
1529 /* Calculate table index by multiplying r with table scale and truncate to integer */
1530 rt = _fjsp_mul_v2r8(r00,vftabscale);
1531 itab_tmp = _fjsp_dtox_v2r8(rt);
1532 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1533 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1534 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1539 /* CUBIC SPLINE TABLE DISPERSION */
1540 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1541 F = _fjsp_setzero_v2r8();
1542 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1543 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
1544 H = _fjsp_setzero_v2r8();
1545 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1546 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1547 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1548 fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
1550 /* CUBIC SPLINE TABLE REPULSION */
1551 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
1552 F = _fjsp_setzero_v2r8();
1553 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1554 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
1555 H = _fjsp_setzero_v2r8();
1556 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1557 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1558 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1559 fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
1560 fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
1564 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1566 /* Update vectorial force */
1567 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
1568 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1569 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1571 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1572 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1573 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1575 /**************************
1576 * CALCULATE INTERACTIONS *
1577 **************************/
1579 /* REACTION-FIELD ELECTROSTATICS */
1580 felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
1584 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1586 /* Update vectorial force */
1587 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
1588 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1589 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1591 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1592 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1593 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1595 /**************************
1596 * CALCULATE INTERACTIONS *
1597 **************************/
1599 /* REACTION-FIELD ELECTROSTATICS */
1600 felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
1604 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1606 /* Update vectorial force */
1607 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
1608 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1609 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1611 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1612 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1613 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1615 /**************************
1616 * CALCULATE INTERACTIONS *
1617 **************************/
1619 /* REACTION-FIELD ELECTROSTATICS */
1620 felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
1624 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1626 /* Update vectorial force */
1627 fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
1628 fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
1629 fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
1631 fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
1632 fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
1633 fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
1635 /**************************
1636 * CALCULATE INTERACTIONS *
1637 **************************/
1639 /* REACTION-FIELD ELECTROSTATICS */
1640 felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1644 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1646 /* Update vectorial force */
1647 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1648 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1649 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1651 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1652 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1653 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1655 /**************************
1656 * CALCULATE INTERACTIONS *
1657 **************************/
1659 /* REACTION-FIELD ELECTROSTATICS */
1660 felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1664 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1666 /* Update vectorial force */
1667 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1668 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1669 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1671 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1672 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1673 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1675 /**************************
1676 * CALCULATE INTERACTIONS *
1677 **************************/
1679 /* REACTION-FIELD ELECTROSTATICS */
1680 felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
1684 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1686 /* Update vectorial force */
1687 fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
1688 fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
1689 fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
1691 fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
1692 fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
1693 fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
1695 /**************************
1696 * CALCULATE INTERACTIONS *
1697 **************************/
1699 /* REACTION-FIELD ELECTROSTATICS */
1700 felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
1704 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1706 /* Update vectorial force */
1707 fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
1708 fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
1709 fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
1711 fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
1712 fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
1713 fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
1715 /**************************
1716 * CALCULATE INTERACTIONS *
1717 **************************/
1719 /* REACTION-FIELD ELECTROSTATICS */
1720 felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
1724 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1726 /* Update vectorial force */
1727 fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
1728 fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
1729 fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
1731 fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
1732 fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
1733 fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
1735 /**************************
1736 * CALCULATE INTERACTIONS *
1737 **************************/
1739 /* REACTION-FIELD ELECTROSTATICS */
1740 felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
1744 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1746 /* Update vectorial force */
1747 fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
1748 fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
1749 fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
1751 fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
1752 fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
1753 fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
1755 gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1757 /* Inner loop uses 324 flops */
1760 /* End of innermost loop */
1762 gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
1763 f+i_coord_offset,fshift+i_shift_offset);
1765 /* Increment number of inner iterations */
1766 inneriter += j_index_end - j_index_start;
1768 /* Outer loop uses 24 flops */
1771 /* Increment number of outer iterations */
1774 /* Update outer/inner flops */
1776 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*324);